diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index 455f46095..2ffe85fe4 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -25,13 +25,6 @@ jobs: fail-fast: false matrix: include: - - cuda: "124" - cuda_version: 12.4.1 - cudnn_version: "" - python_version: "3.11" - pytorch: 2.5.1 - torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" - dockerfile: "Dockerfile-base" - cuda: "124" cuda_version: 12.4.1 cudnn_version: "" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a43dbac41..8692496f1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,16 +15,11 @@ jobs: fail-fast: false matrix: include: - - cuda: 124 - cuda_version: 12.4.1 - python_version: "3.11" - pytorch: 2.5.1 - axolotl_extras: - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 - axolotl_extras: vllm + axolotl_extras: - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" @@ -87,11 +82,6 @@ jobs: strategy: matrix: include: - - cuda: 124 - cuda_version: 12.4.1 - python_version: "3.11" - pytorch: 2.5.1 - axolotl_extras: - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index 09d9663a9..6180faf96 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -33,13 +33,6 @@ jobs: axolotl_extras: num_gpus: 2 nightly_build: "true" - - cuda: 124 - cuda_version: 12.4.1 - python_version: "3.11" - pytorch: 2.5.1 - axolotl_extras: - num_gpus: 2 - nightly_build: "true" - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" diff --git a/.github/workflows/nightlies.yml b/.github/workflows/nightlies.yml index 4e61984fb..824c7e4f2 100644 --- a/.github/workflows/nightlies.yml +++ b/.github/workflows/nightlies.yml @@ -12,11 +12,6 @@ jobs: fail-fast: false matrix: include: - - cuda: 124 - cuda_version: 12.4.1 - python_version: "3.11" - pytorch: 2.5.1 - axolotl_extras: - cuda: 124 cuda_version: 12.4.1 python_version: "3.11" @@ -68,10 +63,10 @@ jobs: - cuda: 124 cuda_version: 12.4.1 python_version: "3.11" - pytorch: 2.5.1 + pytorch: 2.6.0 axolotl_extras: - - cuda: 124 - cuda_version: 12.4.1 + - cuda: 126 + cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 axolotl_extras: diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 8a51153d6..b5dd50a3c 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -26,7 +26,7 @@ jobs: max-parallel: 2 matrix: python_version: ["3.11"] - pytorch_version: ["2.5.1", "2.6.0", "2.7.0"] + pytorch_version: ["2.6.0", "2.7.0"] timeout-minutes: 20 steps: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f98d2bedd..9c983ad70 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -52,7 +52,7 @@ jobs: fail-fast: false matrix: python_version: ["3.11"] - pytorch_version: ["2.5.1", "2.6.0", "2.7.0", "2.7.1"] + pytorch_version: ["2.6.0", "2.7.0", "2.7.1"] timeout-minutes: 20 steps: @@ -125,7 +125,7 @@ jobs: fail-fast: false matrix: python_version: ["3.11"] - pytorch_version: ["2.5.1", "2.6.0", "2.7.0", "2.7.1"] + pytorch_version: ["2.6.0", "2.7.0", "2.7.1"] timeout-minutes: 20 steps: @@ -198,7 +198,7 @@ jobs: - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" - pytorch: 2.6.0 + pytorch: 2.7.1 num_gpus: 1 axolotl_extras: - cuda: 126 @@ -252,18 +252,6 @@ jobs: python_version: "3.11" pytorch: 2.6.0 num_gpus: 1 - axolotl_extras: llmcompressor - - cuda: 124 - cuda_version: 12.4.1 - python_version: "3.11" - pytorch: 2.5.1 - num_gpus: 1 - axolotl_extras: - - cuda: 126 - cuda_version: 12.6.3 - python_version: "3.11" - pytorch: 2.7.1 - num_gpus: 1 axolotl_extras: - cuda: 128 cuda_version: 12.8.1 diff --git a/README.md b/README.md index ec9fc9f7f..406781039 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ Features: - NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU - Python 3.11 -- PyTorch ≥2.5.1 +- PyTorch ≥2.6.0 ### Installation diff --git a/cicd/multigpu.py b/cicd/multigpu.py index 848110a84..2c067f143 100644 --- a/cicd/multigpu.py +++ b/cicd/multigpu.py @@ -24,9 +24,9 @@ df_template = template_env.get_template("Dockerfile.jinja") df_args = { "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""), "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""), - "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.5.1"), - "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu124-2.5.1"), - "CUDA": os.environ.get("CUDA", "124"), + "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"), + "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"), + "CUDA": os.environ.get("CUDA", "126"), "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"), "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""), "CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""), diff --git a/cicd/single_gpu.py b/cicd/single_gpu.py index 357aa41ee..6955af013 100644 --- a/cicd/single_gpu.py +++ b/cicd/single_gpu.py @@ -24,9 +24,9 @@ df_template = template_env.get_template(dockerfile) df_args = { "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""), "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""), - "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.5.1"), - "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu124-2.5.1"), - "CUDA": os.environ.get("CUDA", "124"), + "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"), + "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"), + "CUDA": os.environ.get("CUDA", "126"), "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"), "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""), "NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""), diff --git a/docs/docker.qmd b/docs/docker.qmd index 197185d88..5b238c520 100644 --- a/docs/docker.qmd +++ b/docs/docker.qmd @@ -36,7 +36,6 @@ Tags examples: - `main-base-py3.11-cu126-2.7.1` - `main-base-py3.11-cu126-2.6.0` - `main-base-py3.11-cu124-2.6.0` -- `main-base-py3.11-cu124-2.5.1` ## Main @@ -78,10 +77,9 @@ Tags examples: - `main-py3.11-cu126-2.7.1` - `main-py3.11-cu126-2.6.0` - `main-py3.11-cu124-2.6.0` -- `main-py3.11-cu124-2.5.1` - `main-latest` - `main-20250303-py3.11-cu124-2.6.0` -- `main-20250303-py3.11-cu124-2.5.1` +- `main-20250303-py3.11-cu126-2.6.0` - `0.10.1` ## Cloud diff --git a/docs/installation.qmd b/docs/installation.qmd index c905e93cd..0a29aedb9 100644 --- a/docs/installation.qmd +++ b/docs/installation.qmd @@ -15,7 +15,7 @@ This guide covers all the ways you can install and set up Axolotl for your envir - NVIDIA GPU (Ampere architecture or newer for `bf16` and Flash Attention) or AMD GPU - Python ≥3.11 -- PyTorch ≥2.5.1 +- PyTorch ≥2.6.0 ## Installation Methods {#sec-installation-methods} diff --git a/src/axolotl/__init__.py b/src/axolotl/__init__.py index 314d22279..f31a40b4d 100644 --- a/src/axolotl/__init__.py +++ b/src/axolotl/__init__.py @@ -4,4 +4,4 @@ import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package -__version__ = "0.11.0.dev" +__version__ = "0.11.0" diff --git a/src/axolotl/integrations/kd/README.md b/src/axolotl/integrations/kd/README.md index 4b15ad31d..5e35cf3d7 100644 --- a/src/axolotl/integrations/kd/README.md +++ b/src/axolotl/integrations/kd/README.md @@ -11,7 +11,7 @@ kd_ce_alpha: 0.1 kd_alpha: 0.9 kd_temperature: 1.0 -torch_compile: True # torch>=2.5.1, recommended to reduce vram +torch_compile: True # torch>=2.6.0, recommended to reduce vram datasets: - path: ... diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py index e590dbdaa..9dc04c7b4 100644 --- a/src/axolotl/monkeypatch/multipack.py +++ b/src/axolotl/monkeypatch/multipack.py @@ -35,6 +35,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [ "deepseek_v3", "glm", "glm4", + "smollm3", ] diff --git a/src/axolotl/utils/schemas/config.py b/src/axolotl/utils/schemas/config.py index 94df7cde8..323e0877d 100644 --- a/src/axolotl/utils/schemas/config.py +++ b/src/axolotl/utils/schemas/config.py @@ -627,7 +627,7 @@ class AxolotlInputConfig( torch_compile: Literal["auto"] | bool | None = Field( default=None, json_schema_extra={ - "description": "Whether to use torch.compile and which backend to use. setting to `auto` will enable torch compile when torch>=2.5.1" + "description": "Whether to use torch.compile and which backend to use. setting to `auto` will enable torch compile when torch>=2.6.0" }, ) torch_compile_backend: str | None = Field( @@ -1083,9 +1083,9 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig): def check_min_torch_version(self): if self.env_capabilities and self.env_capabilities.torch_version: torch_version = self.env_capabilities.torch_version - if version.parse(torch_version) < version.parse("2.5.1"): + if version.parse(torch_version) < version.parse("2.6.0"): LOG.warning( - f"torch=={torch_version} may not be supported in future versions. Please consider upgrading to torch>=2.5.1." + f"torch=={torch_version} not be supported. Please upgrade to torch>=2.6.0." ) return self diff --git a/tests/patched/test_validation.py b/tests/patched/test_validation.py index 55e25daf7..677512d3d 100644 --- a/tests/patched/test_validation.py +++ b/tests/patched/test_validation.py @@ -692,7 +692,7 @@ class TestValidation(BaseValidation): "bf16": True, "capabilities": {"bf16": False}, "env_capabilities": { - "torch_version": "2.5.1", + "torch_version": "2.6.0", }, } ) @@ -1202,7 +1202,7 @@ class TestValidation(BaseValidation): cfg, capabilities=capabilities, env_capabilities=env_capabilities ) - env_capabilities = {"torch_version": "2.5.1"} + env_capabilities = {"torch_version": "2.6.0"} capabilities = {"bf16": False} _ = validate_config( cfg, capabilities=capabilities, env_capabilities=env_capabilities @@ -1244,7 +1244,7 @@ class TestTorchCompileValidation(BaseValidation): | minimal_cfg ) - env_capabilities = {"torch_version": "2.5.1"} + env_capabilities = {"torch_version": "2.6.0"} capabilities = {"bf16": True} updated_cfg = validate_config( cfg, capabilities=capabilities, env_capabilities=env_capabilities diff --git a/tests/test_validation_dataset.py b/tests/test_validation_dataset.py index ba142f3bf..1a4c97314 100644 --- a/tests/test_validation_dataset.py +++ b/tests/test_validation_dataset.py @@ -73,7 +73,7 @@ class TestValidationCheckDatasetConfig(BaseValidation): "compute_capability": "8.0", }, env_capabilities={ - "torch_version": "2.5.1", + "torch_version": "2.6.0", }, ) @@ -128,7 +128,7 @@ class TestValidationCheckDatasetConfig(BaseValidation): "compute_capability": "8.0", }, env_capabilities={ - "torch_version": "2.5.1", + "torch_version": "2.6.0", }, ) @@ -184,7 +184,7 @@ class TestValidationCheckDatasetConfig(BaseValidation): "compute_capability": "8.0", }, env_capabilities={ - "torch_version": "2.5.1", + "torch_version": "2.6.0", }, ) @@ -241,7 +241,7 @@ class TestValidationCheckDatasetConfig(BaseValidation): "compute_capability": "8.0", }, env_capabilities={ - "torch_version": "2.5.1", + "torch_version": "2.6.0", }, )