diff --git a/README.md b/README.md index 13518f2a8..285867215 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ ## 🎉 Latest Updates -- 2025/12: Axolotl now includes support for [Olmo3](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/olmo3), [Trinity](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/trinity), and [Ministral3](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/ministral). +- 2025/12: Axolotl now includes support for [Olmo3](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/olmo3), [Trinity](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/trinity), and [Ministral3](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/ministral3). - 2025/10: New model support has been added in Axolotl for: [Qwen3 Next](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/qwen3-next), [Qwen2.5-vl, Qwen3-vl](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/qwen2_5-vl), [Qwen3, Qwen3MoE](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/qwen3), [Granite 4](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/granite4), [HunYuan](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/hunyuan), [Magistral 2509](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/magistral#vision), [Apertus](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/apertus), and [Seed-OSS](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/seed-oss). - 2025/09: Axolotl now has text diffusion training. Read more [here](https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/diffusion). - 2025/08: QAT has been updated to include NVFP4 support. See [PR](https://github.com/axolotl-ai-cloud/axolotl/pull/3107). diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb index 06705eb3d..77a4154e2 100644 --- a/examples/colab-notebooks/colab-axolotl-example.ipynb +++ b/examples/colab-notebooks/colab-axolotl-example.ipynb @@ -253,7 +253,6 @@ "source": [ "from axolotl.utils import set_pytorch_cuda_alloc_conf\n", "\n", - "# Set \"PYTORCH_CUDA_ALLOC_CONF\" env to save memory\n", "set_pytorch_cuda_alloc_conf()" ] }, diff --git a/requirements.txt b/requirements.txt index 21c94a3c2..0989325ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -72,4 +72,4 @@ axolotl-contribs-mit==0.0.5 # telemetry posthog==6.7.11 -mistral-common==1.8.5 +mistral-common==1.8.6 diff --git a/src/axolotl/utils/__init__.py b/src/axolotl/utils/__init__.py index de67aadd0..335049158 100644 --- a/src/axolotl/utils/__init__.py +++ b/src/axolotl/utils/__init__.py @@ -41,14 +41,22 @@ def get_pytorch_version() -> tuple[int, int, int]: def set_pytorch_cuda_alloc_conf(): - """Set up CUDA allocation config if using PyTorch >= 2.2""" + """Set up CUDA allocation config""" torch_version = torch.__version__.split(".") torch_major, torch_minor = int(torch_version[0]), int(torch_version[1]) - if torch_major == 2 and torch_minor >= 2: - if os.getenv("PYTORCH_CUDA_ALLOC_CONF") is None: - os.environ["PYTORCH_CUDA_ALLOC_CONF"] = ( - "expandable_segments:True,roundup_power2_divisions:16" - ) + config_value = "expandable_segments:True,roundup_power2_divisions:16" + if ( + torch_major == 2 + and torch_minor >= 9 + and os.getenv("PYTORCH_ALLOC_CONF") is None + ): + os.environ["PYTORCH_ALLOC_CONF"] = config_value + elif ( + torch_major == 2 + and torch_minor >= 2 + and os.getenv("PYTORCH_CUDA_ALLOC_CONF") is None + ): + os.environ["PYTORCH_CUDA_ALLOC_CONF"] = config_value def set_misc_env():