diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
index 455f46095..2ffe85fe4 100644
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -25,13 +25,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: "124"
-            cuda_version: 12.4.1
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.5.1
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-base"
           - cuda: "124"
             cuda_version: 12.4.1
             cudnn_version: ""
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a43dbac41..8692496f1 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -15,16 +15,11 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.5.1
-            axolotl_extras:
           - cuda: 126
             cuda_version: 12.6.3
             python_version: "3.11"
             pytorch: 2.6.0
-            axolotl_extras: vllm
+            axolotl_extras:
           - cuda: 126
             cuda_version: 12.6.3
             python_version: "3.11"
@@ -87,11 +82,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.5.1
-            axolotl_extras:
           - cuda: 126
             cuda_version: 12.6.3
             python_version: "3.11"
diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml
index 09d9663a9..6180faf96 100644
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -33,13 +33,6 @@ jobs:
             axolotl_extras:
             num_gpus: 2
             nightly_build: "true"
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.5.1
-            axolotl_extras:
-            num_gpus: 2
-            nightly_build: "true"
           - cuda: 126
             cuda_version: 12.6.3
             python_version: "3.11"
diff --git a/.github/workflows/nightlies.yml b/.github/workflows/nightlies.yml
index 4e61984fb..824c7e4f2 100644
--- a/.github/workflows/nightlies.yml
+++ b/.github/workflows/nightlies.yml
@@ -12,11 +12,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.5.1
-            axolotl_extras:
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
@@ -68,10 +63,10 @@ jobs:
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
-            pytorch: 2.5.1
+            pytorch: 2.6.0
             axolotl_extras:
-          - cuda: 124
-            cuda_version: 12.4.1
+          - cuda: 126
+            cuda_version: 12.6.3
             python_version: "3.11"
             pytorch: 2.6.0
             axolotl_extras:
diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml
index 8a51153d6..b5dd50a3c 100644
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -26,7 +26,7 @@ jobs:
       max-parallel: 2
       matrix:
         python_version: ["3.11"]
-        pytorch_version: ["2.5.1", "2.6.0", "2.7.0"]
+        pytorch_version: ["2.6.0", "2.7.0"]
     timeout-minutes: 20
 
     steps:
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f98d2bedd..9c983ad70 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -52,7 +52,7 @@ jobs:
       fail-fast: false
       matrix:
         python_version: ["3.11"]
-        pytorch_version: ["2.5.1", "2.6.0", "2.7.0", "2.7.1"]
+        pytorch_version: ["2.6.0", "2.7.0", "2.7.1"]
     timeout-minutes: 20
 
     steps:
@@ -125,7 +125,7 @@ jobs:
       fail-fast: false
       matrix:
         python_version: ["3.11"]
-        pytorch_version: ["2.5.1", "2.6.0", "2.7.0", "2.7.1"]
+        pytorch_version: ["2.6.0", "2.7.0", "2.7.1"]
     timeout-minutes: 20
 
     steps:
@@ -198,7 +198,7 @@ jobs:
           - cuda: 126
             cuda_version: 12.6.3
             python_version: "3.11"
-            pytorch: 2.6.0
+            pytorch: 2.7.1
             num_gpus: 1
             axolotl_extras:
           - cuda: 126
@@ -252,18 +252,6 @@ jobs:
             python_version: "3.11"
             pytorch: 2.6.0
             num_gpus: 1
-            axolotl_extras: llmcompressor
-          - cuda: 124
-            cuda_version: 12.4.1
-            python_version: "3.11"
-            pytorch: 2.5.1
-            num_gpus: 1
-            axolotl_extras:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            num_gpus: 1
             axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
diff --git a/README.md b/README.md
index ec9fc9f7f..406781039 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ Features:
 
 - NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
 - Python 3.11
-- PyTorch ≥2.5.1
+- PyTorch ≥2.6.0
 
 ### Installation
 
diff --git a/cicd/multigpu.py b/cicd/multigpu.py
index 848110a84..2c067f143 100644
--- a/cicd/multigpu.py
+++ b/cicd/multigpu.py
@@ -24,9 +24,9 @@ df_template = template_env.get_template("Dockerfile.jinja")
 df_args = {
     "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
     "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
-    "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.5.1"),
-    "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu124-2.5.1"),
-    "CUDA": os.environ.get("CUDA", "124"),
+    "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
+    "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
+    "CUDA": os.environ.get("CUDA", "126"),
     "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
     "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
     "CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""),
diff --git a/cicd/single_gpu.py b/cicd/single_gpu.py
index 357aa41ee..6955af013 100644
--- a/cicd/single_gpu.py
+++ b/cicd/single_gpu.py
@@ -24,9 +24,9 @@ df_template = template_env.get_template(dockerfile)
 df_args = {
     "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
     "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
-    "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.5.1"),
-    "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu124-2.5.1"),
-    "CUDA": os.environ.get("CUDA", "124"),
+    "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
+    "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
+    "CUDA": os.environ.get("CUDA", "126"),
     "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
     "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
     "NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
diff --git a/docs/docker.qmd b/docs/docker.qmd
index 197185d88..5b238c520 100644
--- a/docs/docker.qmd
+++ b/docs/docker.qmd
@@ -36,7 +36,6 @@ Tags examples:
 - `main-base-py3.11-cu126-2.7.1`
 - `main-base-py3.11-cu126-2.6.0`
 - `main-base-py3.11-cu124-2.6.0`
-- `main-base-py3.11-cu124-2.5.1`
 
 ## Main
 
@@ -78,10 +77,9 @@ Tags examples:
 - `main-py3.11-cu126-2.7.1`
 - `main-py3.11-cu126-2.6.0`
 - `main-py3.11-cu124-2.6.0`
-- `main-py3.11-cu124-2.5.1`
 - `main-latest`
 - `main-20250303-py3.11-cu124-2.6.0`
-- `main-20250303-py3.11-cu124-2.5.1`
+- `main-20250303-py3.11-cu126-2.6.0`
 - `0.10.1`
 
 ## Cloud
diff --git a/docs/installation.qmd b/docs/installation.qmd
index c905e93cd..0a29aedb9 100644
--- a/docs/installation.qmd
+++ b/docs/installation.qmd
@@ -15,7 +15,7 @@ This guide covers all the ways you can install and set up Axolotl for your envir
 
 - NVIDIA GPU (Ampere architecture or newer for `bf16` and Flash Attention) or AMD GPU
 - Python ≥3.11
-- PyTorch ≥2.5.1
+- PyTorch ≥2.6.0
 
 ## Installation Methods {#sec-installation-methods}
 
diff --git a/src/axolotl/__init__.py b/src/axolotl/__init__.py
index 314d22279..f31a40b4d 100644
--- a/src/axolotl/__init__.py
+++ b/src/axolotl/__init__.py
@@ -4,4 +4,4 @@ import pkgutil
 
 __path__ = pkgutil.extend_path(__path__, __name__)  # Make this a namespace package
 
-__version__ = "0.11.0.dev"
+__version__ = "0.11.0"
diff --git a/src/axolotl/integrations/kd/README.md b/src/axolotl/integrations/kd/README.md
index 4b15ad31d..5e35cf3d7 100644
--- a/src/axolotl/integrations/kd/README.md
+++ b/src/axolotl/integrations/kd/README.md
@@ -11,7 +11,7 @@ kd_ce_alpha: 0.1
 kd_alpha: 0.9
 kd_temperature: 1.0
 
-torch_compile: True  # torch>=2.5.1, recommended to reduce vram
+torch_compile: True  # torch>=2.6.0, recommended to reduce vram
 
 datasets:
   - path: ...
diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py
index e590dbdaa..9dc04c7b4 100644
--- a/src/axolotl/monkeypatch/multipack.py
+++ b/src/axolotl/monkeypatch/multipack.py
@@ -35,6 +35,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
     "deepseek_v3",
     "glm",
     "glm4",
+    "smollm3",
 ]
 
 
diff --git a/src/axolotl/utils/schemas/config.py b/src/axolotl/utils/schemas/config.py
index 94df7cde8..323e0877d 100644
--- a/src/axolotl/utils/schemas/config.py
+++ b/src/axolotl/utils/schemas/config.py
@@ -627,7 +627,7 @@ class AxolotlInputConfig(
     torch_compile: Literal["auto"] | bool | None = Field(
         default=None,
         json_schema_extra={
-            "description": "Whether to use torch.compile and which backend to use. setting to `auto` will enable torch compile when torch>=2.5.1"
+            "description": "Whether to use torch.compile and which backend to use. setting to `auto` will enable torch compile when torch>=2.6.0"
         },
     )
     torch_compile_backend: str | None = Field(
@@ -1083,9 +1083,9 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig):
     def check_min_torch_version(self):
         if self.env_capabilities and self.env_capabilities.torch_version:
             torch_version = self.env_capabilities.torch_version
-            if version.parse(torch_version) < version.parse("2.5.1"):
+            if version.parse(torch_version) < version.parse("2.6.0"):
                 LOG.warning(
-                    f"torch=={torch_version} may not be supported in future versions. Please consider upgrading to torch>=2.5.1."
+                    f"torch=={torch_version} not be supported. Please upgrade to torch>=2.6.0."
                 )
 
         return self
diff --git a/tests/patched/test_validation.py b/tests/patched/test_validation.py
index 55e25daf7..677512d3d 100644
--- a/tests/patched/test_validation.py
+++ b/tests/patched/test_validation.py
@@ -692,7 +692,7 @@ class TestValidation(BaseValidation):
                     "bf16": True,
                     "capabilities": {"bf16": False},
                     "env_capabilities": {
-                        "torch_version": "2.5.1",
+                        "torch_version": "2.6.0",
                     },
                 }
             )
@@ -1202,7 +1202,7 @@ class TestValidation(BaseValidation):
                 cfg, capabilities=capabilities, env_capabilities=env_capabilities
             )
 
-        env_capabilities = {"torch_version": "2.5.1"}
+        env_capabilities = {"torch_version": "2.6.0"}
         capabilities = {"bf16": False}
         _ = validate_config(
             cfg, capabilities=capabilities, env_capabilities=env_capabilities
@@ -1244,7 +1244,7 @@ class TestTorchCompileValidation(BaseValidation):
             | minimal_cfg
         )
 
-        env_capabilities = {"torch_version": "2.5.1"}
+        env_capabilities = {"torch_version": "2.6.0"}
         capabilities = {"bf16": True}
         updated_cfg = validate_config(
             cfg, capabilities=capabilities, env_capabilities=env_capabilities
diff --git a/tests/test_validation_dataset.py b/tests/test_validation_dataset.py
index ba142f3bf..1a4c97314 100644
--- a/tests/test_validation_dataset.py
+++ b/tests/test_validation_dataset.py
@@ -73,7 +73,7 @@ class TestValidationCheckDatasetConfig(BaseValidation):
                 "compute_capability": "8.0",
             },
             env_capabilities={
-                "torch_version": "2.5.1",
+                "torch_version": "2.6.0",
             },
         )
 
@@ -128,7 +128,7 @@ class TestValidationCheckDatasetConfig(BaseValidation):
                 "compute_capability": "8.0",
             },
             env_capabilities={
-                "torch_version": "2.5.1",
+                "torch_version": "2.6.0",
             },
         )
 
@@ -184,7 +184,7 @@ class TestValidationCheckDatasetConfig(BaseValidation):
                 "compute_capability": "8.0",
             },
             env_capabilities={
-                "torch_version": "2.5.1",
+                "torch_version": "2.6.0",
             },
         )
 
@@ -241,7 +241,7 @@ class TestValidationCheckDatasetConfig(BaseValidation):
                 "compute_capability": "8.0",
             },
             env_capabilities={
-                "torch_version": "2.5.1",
+                "torch_version": "2.6.0",
             },
         )