diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index a0e4d3081..6bb6a6b8f 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -31,7 +31,10 @@ PRs are **greatly welcome**! Please run below to setup env ```bash -pip3 install -r requirements-dev.txt -r requirements-tests.txt +# Install axolotl + dev and test dependencies from lockfile +export UV_TORCH_BACKEND=cu128 # or cu130 +uv sync --extra flash-attn --extra deepspeed --group dev --group test +source .venv/bin/activate pre-commit install # test diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 181fd9dc9..e89e27642 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -6,7 +6,7 @@ on: types: [opened, synchronize, reopened, ready_for_review] paths: - '**.py' - - 'requirements.txt' + - 'pyproject.toml' - '.github/workflows/*.yml' - "*.[q]md" - "examples/**/*.y[a]?ml" diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index 2c5d76e4c..03da58f7e 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -3,17 +3,15 @@ name: docker-multigpu-tests-biweekly on: pull_request: paths: - - 'tests/e2e/multigpu/**.py' - - 'requirements.txt' - - 'setup.py' - - 'pyproject.toml' - - '.github/workflows/multi-gpu-e2e.yml' - - 'scripts/cutcrossentropy_install.py' - - 'src/axolotl/core/trainers/mixins/sequence_parallel.py' - - 'src/axolotl/utils/distributed.py' + - "tests/e2e/multigpu/**.py" + - "pyproject.toml" + - ".github/workflows/multi-gpu-e2e.yml" + - "scripts/cutcrossentropy_install.py" + - "src/axolotl/core/trainers/mixins/sequence_parallel.py" + - "src/axolotl/utils/distributed.py" workflow_dispatch: schedule: - - cron: '0 0 * * 1,4' # Runs at 00:00 UTC every monday & thursday + - cron: "0 0 * * 1,4" # Runs at 00:00 UTC every monday & thursday # Cancel jobs on the same ref if a new one is triggered concurrency: @@ -33,19 +31,19 @@ jobs: fail-fast: false matrix: include: -# - cuda: 129 -# cuda_version: 12.9.1 -# python_version: "3.12" -# pytorch: 2.9.1 -# axolotl_extras: "fbgemm-gpu" -# num_gpus: 2 -# dockerfile: "Dockerfile-uv.jinja" + # - cuda: 129 + # cuda_version: 12.9.1 + # python_version: "3.12" + # pytorch: 2.9.1 + # axolotl_extras: "fbgemm-gpu" + # num_gpus: 2 + # dockerfile: "Dockerfile-uv.jinja" - cuda: 130 cuda_version: 13.0.0 python_version: "3.11" pytorch: 2.9.1 axolotl_extras: -# axolotl_extras: fbgemm-gpu + # axolotl_extras: fbgemm-gpu num_gpus: 2 - cuda: 128 cuda_version: 12.8.1 @@ -53,7 +51,6 @@ jobs: pytorch: 2.10.0 axolotl_extras: "fbgemm-gpu" num_gpus: 2 - dockerfile: "Dockerfile-uv.jinja" runs-on: [self-hosted, modal] timeout-minutes: 120 steps: @@ -75,7 +72,7 @@ jobs: echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV - echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV + echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile-uv.jinja'}}" >> $GITHUB_ENV - name: Run tests job on Modal env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 19dface73..c2fc1c9d8 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -8,6 +8,9 @@ on: permissions: {} +env: + UV_SYSTEM_PYTHON: "1" + jobs: setup_release: name: Create Release @@ -41,11 +44,15 @@ jobs: with: python-version: "3.11" + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install dependencies run: | - pip3 install wheel packaging==26.0 - pip3 install --no-build-isolation -e . - pip3 install -r requirements-dev.txt -r requirements-tests.txt + uv pip install wheel packaging + uv pip install --no-build-isolation -e . + uv pip install black mypy pre-commit types-requests quartodoc jupyter blobfile tiktoken \ + codecov codecov-cli pytest pytest-cov pytest-retry pytest-sugar pytest-xdist tbparse - name: Extract tag name id: tag diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 235aebcfa..1802b6305 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -2,15 +2,18 @@ name: Tests Nightly against upstream main on: workflow_dispatch: schedule: - - cron: '0 0 * * *' # Runs at 00:00 UTC every day + - cron: "0 0 * * *" # Runs at 00:00 UTC every day pull_request: types: [opened, synchronize, reopened, ready_for_review] paths: - - '.github/workflows/tests-nightly.yml' + - ".github/workflows/tests-nightly.yml" permissions: contents: read +env: + UV_SYSTEM_PYTHON: "1" + jobs: pre-commit: name: pre-commit @@ -20,7 +23,7 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "3.11" - cache: 'pip' # caching pip dependencies + cache: "pip" # caching pip dependencies - uses: pre-commit/action@v3.0.1 env: SKIP: no-commit-to-branch @@ -43,7 +46,7 @@ jobs: strategy: fail-fast: false matrix: - python_version: ["3.12"] # TODO include py3.14 once https://github.com/mistralai/mistral-common/pull/194 is merged + python_version: ["3.12"] # TODO include py3.14 once https://github.com/mistralai/mistral-common/pull/194 is merged pytorch_version: ["2.9.1", "2.10.0"] timeout-minutes: 20 @@ -61,36 +64,34 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python_version }} - cache: 'pip' # caching pip dependencies - - name: upgrade pip - run: | - pip3 install --upgrade pip - pip3 install --upgrade packaging==26.0 setuptools==78.1.1 wheel + - name: Install uv + uses: astral-sh/setup-uv@v7 - name: Install PyTorch run: | - pip3 install torch==${{ matrix.pytorch_version }} torchvision - - - name: Update requirements.txt - run: | - sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt - sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt - sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt - sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt - sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt + uv pip install torch==${{ matrix.pytorch_version }} torchvision + uv pip freeze | grep -E "^(torch|torchvision)==" > /tmp/torch-pin.txt - name: Install dependencies run: | - pip3 show torch - pip3 install --no-build-isolation -U -e . - python scripts/unsloth_install.py | sh - python scripts/cutcrossentropy_install.py | sh - pip3 install -r requirements-dev.txt -r requirements-tests.txt + uv pip install --no-build-isolation -e . --override /tmp/torch-pin.txt + python scripts/cutcrossentropy_install.py --uv | sh + uv pip install black mypy pre-commit types-requests quartodoc jupyter blobfile tiktoken \ + codecov codecov-cli pytest pytest-cov pytest-retry pytest-sugar pytest-xdist tbparse + + - name: Override with nightly HF packages + run: | + uv pip install --no-deps \ + "transformers @ git+https://github.com/huggingface/transformers.git@main" \ + "peft @ git+https://github.com/huggingface/peft.git@main" \ + "accelerate @ git+https://github.com/huggingface/accelerate.git@main" \ + "trl @ git+https://github.com/huggingface/trl.git@main" \ + "datasets @ git+https://github.com/huggingface/datasets.git@main" - name: Make sure PyTorch version wasn't clobbered run: | - python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__" + python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__, f'Expected torch ${{ matrix.pytorch_version }} but got {torch.__version__}'" - name: Ensure axolotl CLI was installed run: | @@ -102,9 +103,6 @@ jobs: pytest -v --durations=10 tests/patched/ pytest -v --durations=10 tests/cli/ - - name: cleanup pip cache - run: | - find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; docker-e2e-tests: if: github.repository_owner == 'axolotl-ai-cloud' @@ -136,7 +134,6 @@ jobs: pytorch: 2.9.1 num_gpus: 1 axolotl_extras: - dockerfile: "Dockerfile-uv.jinja" nightly_build: "true" steps: - name: Checkout @@ -157,7 +154,7 @@ jobs: echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV - echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV + echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile-uv.jinja'}}" >> $GITHUB_ENV echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV - name: Run tests job on Modal env: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b1e9c718e..e21e60ab5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,21 +6,19 @@ on: branches: - "main" paths: - - '**.py' - - 'requirements.txt' - - '.github/workflows/*.yml' - - 'requirements-tests.txt' - - 'cicd/cicd.sh' - - 'cicd/Dockerfile.jinja' + - "**.py" + - "pyproject.toml" + - ".github/workflows/*.yml" + - "cicd/cicd.sh" + - "cicd/Dockerfile-uv.jinja" pull_request: - types: [opened, synchronize, reopened, ready_for_review] - paths: - - '**.py' - - 'requirements.txt' - - '.github/workflows/*.yml' - - 'requirements-tests.txt' - - 'cicd/cicd.sh' - - 'cicd/Dockerfile.jinja' + types: [opened, synchronize, reopened, ready_for_review] + paths: + - "**.py" + - "pyproject.toml" + - ".github/workflows/*.yml" + - "cicd/cicd.sh" + - "cicd/Dockerfile-uv.jinja" workflow_dispatch: # Cancel jobs on the same ref if a new one is triggered @@ -33,6 +31,7 @@ permissions: env: TRANSFORMERS_IS_CI: "yes" + UV_SYSTEM_PYTHON: "1" jobs: pre-commit: @@ -44,7 +43,7 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "3.11" - cache: 'pip' # caching pip dependencies + cache: "pip" # caching pip dependencies - uses: pre-commit/action@v3.0.1 env: SKIP: no-commit-to-branch @@ -94,32 +93,25 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python_version }} - cache: 'pip' # caching pip dependencies - - name: upgrade pip - run: | - pip3 install --upgrade pip - pip3 install --upgrade packaging==26.0 setuptools==75.8.0 wheel + - name: Install uv + uses: astral-sh/setup-uv@v7 - name: Install PyTorch run: | - pip3 install --no-cache-dir torch==${{ matrix.pytorch_version }} torchvision + uv pip install torch==${{ matrix.pytorch_version }} torchvision + uv pip freeze | grep -E "^(torch|torchvision)==" > /tmp/torch-pin.txt - name: Install dependencies run: | - pip3 show torch - pip3 install --no-cache-dir --no-build-isolation -U -e . - python scripts/unsloth_install.py | sh - python scripts/cutcrossentropy_install.py | sh - pip3 install -r requirements-dev.txt -r requirements-tests.txt - - - name: cleanup pip cache - run: | - find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; + uv pip install --no-build-isolation -e . --override /tmp/torch-pin.txt + python scripts/cutcrossentropy_install.py --uv | sh + uv pip install black mypy pre-commit types-requests quartodoc jupyter blobfile tiktoken \ + codecov codecov-cli pytest pytest-cov pytest-retry pytest-sugar pytest-xdist tbparse - name: Make sure PyTorch version wasn't clobbered run: | - python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__" + python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__, f'Expected torch ${{ matrix.pytorch_version }} but got {torch.__version__}'" - name: Ensure axolotl CLI was installed run: | @@ -188,33 +180,27 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python_version }} - cache: 'pip' # caching pip dependencies - - name: upgrade pip - run: | - pip3 install --upgrade pip - pip3 install --upgrade packaging==26.0 setuptools==75.8.0 setuptools_scm build wheel psutil + - name: Install uv + uses: astral-sh/setup-uv@v7 - name: Install PyTorch run: | - pip3 install --no-cache-dir torch==${{ matrix.pytorch_version }} torchvision + uv pip install torch==${{ matrix.pytorch_version }} torchvision + uv pip freeze | grep -E "^(torch|torchvision)==" > /tmp/torch-pin.txt - name: Install dependencies run: | - pip3 show torch + uv pip install packaging setuptools_scm build wheel psutil python -m build --no-isolation --sdist - pip3 install --no-cache-dir --no-build-isolation dist/axolotl*.tar.gz - python scripts/unsloth_install.py | sh - python scripts/cutcrossentropy_install.py | sh - pip3 install -r requirements-dev.txt -r requirements-tests.txt - - - name: cleanup pip cache - run: | - find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; + uv pip install --no-build-isolation dist/axolotl*.tar.gz --override /tmp/torch-pin.txt + python scripts/cutcrossentropy_install.py --uv | sh + uv pip install black mypy pre-commit types-requests quartodoc jupyter blobfile tiktoken \ + codecov codecov-cli pytest pytest-cov pytest-retry pytest-sugar pytest-xdist tbparse - name: Make sure PyTorch version wasn't clobbered run: | - python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__" + python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__, f'Expected torch ${{ matrix.pytorch_version }} but got {torch.__version__}'" - name: Ensure axolotl CLI was installed run: | @@ -291,7 +277,6 @@ jobs: pytorch: 2.9.1 num_gpus: 1 axolotl_extras: - dockerfile: "Dockerfile-uv.jinja" steps: - name: Checkout uses: actions/checkout@v4 @@ -312,7 +297,7 @@ jobs: echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV - echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV + echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile-uv.jinja'}}" >> $GITHUB_ENV - name: Run tests job on Modal env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} @@ -374,7 +359,7 @@ jobs: echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV echo "GPU_TYPE=${{ matrix.gpu_type || 'L40S'}}" >> $GITHUB_ENV - echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV + echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile-uv.jinja'}}" >> $GITHUB_ENV - name: Run tests job on Modal env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/MANIFEST.in b/MANIFEST.in index 30cd07242..5cf08eabf 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,6 @@ -include requirements.txt include README.md include LICENSE -include src/setuptools_axolotl_dynamic_dependencies.py +include VERSION include src/axolotl/utils/chat_templates/templates/*.jinja include AGENTS.md recursive-include docs/agents *.md diff --git a/README.md b/README.md index 063ec8d3b..73e73d6ae 100644 --- a/README.md +++ b/README.md @@ -95,14 +95,11 @@ Features: ### Installation -#### Using uv (recommended) - ```bash -# install uv if you don't already have it installed +# install uv if you don't already have it installed (restart shell after) curl -LsSf https://astral.sh/uv/install.sh | sh -source $HOME/.local/bin/env -# CUDA 12.8.1 tends to have better package compatibility +# change depending on system export UV_TORCH_BACKEND=cu128 # create a new virtual environment @@ -112,23 +109,6 @@ source .venv/bin/activate uv pip install torch==2.10.0 torchvision uv pip install --no-build-isolation axolotl[deepspeed] -# recommended - install cut-cross-entropy -uv pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@main" - -# (optional) - prefetch flash-attn2 and causal-conv1d kernels -uv run --python 3.12 python -c "from kernels import get_kernel; get_kernel('kernels-community/flash-attn2'); get_kernel('kernels-community/causal-conv1d')" - -# Download example axolotl configs, deepspeed configs -axolotl fetch examples -axolotl fetch deepspeed_configs # OPTIONAL -``` - -#### Using pip - -```bash -pip3 install -U packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation axolotl[flash-attn,deepspeed] - # Download example axolotl configs, deepspeed configs axolotl fetch examples axolotl fetch deepspeed_configs # OPTIONAL @@ -138,7 +118,7 @@ axolotl fetch deepspeed_configs # OPTIONAL Installing with Docker can be less error prone than installing in your own environment. ```bash -docker run --gpus '"all"' --rm -it axolotlai/axolotl:main-latest +docker run --gpus '"all"' --ipc=host --rm -it axolotlai/axolotl:main-latest ``` Other installation approaches are described [here](https://docs.axolotl.ai/docs/installation.html). diff --git a/_quarto.yml b/_quarto.yml index 2916ef2ec..e8263a971 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -134,7 +134,6 @@ quartodoc: - monkeypatch.stablelm_attn_hijack_flash - monkeypatch.trainer_fsdp_optim - monkeypatch.transformers_fa_utils - - monkeypatch.unsloth_ - monkeypatch.data.batch_dataset_fetcher - monkeypatch.mixtral - monkeypatch.gradient_checkpointing.offload_cpu @@ -327,7 +326,6 @@ website: - section: "Advanced Features" contents: - docs/fsdp_qlora.qmd - - docs/unsloth.qmd - docs/torchao.qmd - docs/custom_integrations.qmd - docs/sequence_parallelism.qmd diff --git a/cicd/Dockerfile-uv.jinja b/cicd/Dockerfile-uv.jinja index 857b94c6b..c24512ed3 100644 --- a/cicd/Dockerfile-uv.jinja +++ b/cicd/Dockerfile-uv.jinja @@ -22,15 +22,6 @@ WORKDIR /workspace/axolotl RUN git fetch origin +$GITHUB_REF && \ git checkout FETCH_HEAD -# If AXOLOTL_EXTRAS is set, append it in brackets -RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \ - sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt; \ - sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt; \ - sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt; \ - sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt; \ - sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \ - fi - RUN uv pip install packaging==26.0 setuptools==78.1.1 RUN uv pip install torchvision RUN uv pip uninstall causal_conv1d @@ -40,11 +31,21 @@ RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \ fi -RUN python scripts/unsloth_install.py --uv | sh +# Override with nightly HF packages for nightly builds +RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \ + uv pip install --no-deps \ + "transformers @ git+https://github.com/huggingface/transformers.git@main" \ + "peft @ git+https://github.com/huggingface/peft.git@main" \ + "accelerate @ git+https://github.com/huggingface/accelerate.git@main" \ + "trl @ git+https://github.com/huggingface/trl.git@main" \ + "datasets @ git+https://github.com/huggingface/datasets.git@main"; \ + fi + RUN python scripts/cutcrossentropy_install.py --uv | sh # So we can test the Docker image -RUN uv pip install -r requirements-dev.txt -r requirements-tests.txt +RUN uv pip install black mypy pre-commit types-requests quartodoc jupyter blobfile tiktoken \ + codecov codecov-cli pytest pytest-cov pytest-retry pytest-sugar pytest-xdist tbparse # fix so that git fetch/pull from remote works RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \ diff --git a/cicd/Dockerfile.jinja b/cicd/Dockerfile.jinja deleted file mode 100644 index 7344f2a2c..000000000 --- a/cicd/Dockerfile.jinja +++ /dev/null @@ -1,54 +0,0 @@ -FROM axolotlai/axolotl-base:{{ BASE_TAG }} - -ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" -ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}" -ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}" -ENV CUDA="{{ CUDA }}" -ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}" -ENV GITHUB_REF="{{ GITHUB_REF }}" -ENV GITHUB_SHA="{{ GITHUB_SHA }}" -ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}" -ENV HF_HOME="{{ HF_HOME }}" -ENV AXOLOTL_DATASET_NUM_PROC="8" - -RUN apt-get update && \ - apt-get install -y --allow-change-held-packages vim curl nano zstd libnccl2 libnccl-dev ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm - -WORKDIR /workspace - -RUN git clone --depth=1 https://github.com/axolotl-ai-cloud/axolotl.git - -WORKDIR /workspace/axolotl - -RUN git fetch origin +$GITHUB_REF && \ - git checkout FETCH_HEAD - -# If AXOLOTL_EXTRAS is set, append it in brackets -RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \ - sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt; \ - sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt; \ - sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt; \ - sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt; \ - sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \ - fi - -RUN pip install packaging==26.0 setuptools==78.1.1 psutil -RUN pip uninstall -y causal_conv1d -RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ - pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ - else \ - pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \ - fi - -RUN python scripts/unsloth_install.py | sh -RUN python scripts/cutcrossentropy_install.py | sh - -# So we can test the Docker image -RUN pip install -r requirements-dev.txt -r requirements-tests.txt - -# fix so that git fetch/pull from remote works -RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \ - git config --get remote.origin.fetch - -# helper for huggingface-login cli -RUN git config --global credential.helper store diff --git a/cicd/cicd.sh b/cicd/cicd.sh index a3f17472a..15a6f7ebf 100755 --- a/cicd/cicd.sh +++ b/cicd/cicd.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__" +python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__, f'Expected torch $PYTORCH_VERSION but got {torch.__version__}'" set -o pipefail for i in 1 2 3; do diff --git a/cicd/multigpu.py b/cicd/multigpu.py index ed022c851..5ee0bc49a 100644 --- a/cicd/multigpu.py +++ b/cicd/multigpu.py @@ -17,7 +17,7 @@ template_loader = jinja2.FileSystemLoader(searchpath=cicd_path) template_env = jinja2.Environment( loader=template_loader, autoescape=select_autoescape() ) -dockerfile = os.environ.get("E2E_DOCKERFILE", "Dockerfile.jinja") +dockerfile = os.environ.get("E2E_DOCKERFILE", "Dockerfile-uv.jinja") df_template = template_env.get_template(dockerfile) df_args = { diff --git a/cicd/single_gpu.py b/cicd/single_gpu.py index 592b6b931..9bd8d990e 100644 --- a/cicd/single_gpu.py +++ b/cicd/single_gpu.py @@ -16,7 +16,7 @@ template_loader = jinja2.FileSystemLoader(searchpath=cicd_path) template_env = jinja2.Environment( loader=template_loader, autoescape=select_autoescape() ) -dockerfile = os.environ.get("E2E_DOCKERFILE", "Dockerfile.jinja") +dockerfile = os.environ.get("E2E_DOCKERFILE", "Dockerfile-uv.jinja") df_template = template_env.get_template(dockerfile) df_args = { diff --git a/docker/Dockerfile b/docker/Dockerfile index 5840c1f61..2bdb45b5c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -32,7 +32,7 @@ RUN if [ "$TARGETARCH" = "arm64" ]; then \ pip install --no-build-isolation -e .[$BASE_EXTRAS,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ else \ pip install --no-build-isolation -e .[$BASE_EXTRAS] $AXOLOTL_ARGS; \ - fi && \ python scripts/unsloth_install.py | sh && \ + fi && \ python scripts/cutcrossentropy_install.py | sh && \ pip install pytest && \ pip cache purge diff --git a/docker/Dockerfile-uv b/docker/Dockerfile-uv index 0142c0d2d..df058baa3 100644 --- a/docker/Dockerfile-uv +++ b/docker/Dockerfile-uv @@ -33,7 +33,6 @@ RUN if [ "$TARGETARCH" = "arm64" ]; then \ else \ uv pip install --no-build-isolation -e .[$BASE_EXTRAS] $AXOLOTL_ARGS; \ fi && \ - python scripts/unsloth_install.py --uv | sh && \ python scripts/cutcrossentropy_install.py --uv | sh && \ uv pip install pytest && \ uv cache clean diff --git a/docs/debugging.qmd b/docs/debugging.qmd index 36e39ef16..f3ca6ad9a 100644 --- a/docs/debugging.qmd +++ b/docs/debugging.qmd @@ -76,8 +76,9 @@ datasets: Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/userguide/development_mode.html) of Axolotl, which ensures that changes you make to the code are reflected at runtime. Run the following commands from the root of this project: ```bash -pip3 install packaging -pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]' +export UV_TORCH_BACKEND=cu128 # or cu130 +uv sync --extra flash-attn --extra deepspeed --group dev --group test +source .venv/bin/activate ``` #### Remote Hosts @@ -208,17 +209,17 @@ cd axolotl Next, run the desired docker image and mount the current directory. Below is a docker command you can run to do this:[^2] ```bash -docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 --mount type=bind,src="${PWD}",target=/workspace/axolotl -v ${HOME}/.cache/huggingface:/root/.cache/huggingface axolotlai/axolotl:main-py3.10-cu118-2.0.1 +docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 --mount type=bind,src="${PWD}",target=/workspace/axolotl -v ${HOME}/.cache/huggingface:/root/.cache/huggingface axolotlai/axolotl-uv:main-latest ``` >[!Tip] > To understand which containers are available, see the [Docker section of the README](../README.md#docker) and the [DockerHub repo](https://hub.docker.com/r/axolotlai/axolotl/tags). For details of how the Docker containers are built, see axolotl's [Docker CI builds](../.github/workflows/main.yml). -You will now be in the container. Next, perform an editable install of Axolotl: +You will now be in the container. Next, install Axolotl with dev dependencies: ```bash -pip3 install packaging -pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]' +uv sync --extra flash-attn --extra deepspeed --group dev --group test +source .venv/bin/activate ``` ### Attach To Container diff --git a/docs/docker.qmd b/docs/docker.qmd index 5d146eac2..001cf19a7 100644 --- a/docs/docker.qmd +++ b/docs/docker.qmd @@ -6,23 +6,30 @@ format: toc-depth: 4 --- -This section describes the different Docker images that are released by AxolotlAI at [Docker Hub](https://hub.docker.com/u/axolotlai). +This section describes the different Docker images that are released by AxolotlAI at +[Docker Hub](https://hub.docker.com/u/axolotlai). ::: {.callout-important} -For Blackwell GPUs, please use the tags with PyTorch 2.7.1 and CUDA 12.8. +For Blackwell GPUs, please use the tags with PyTorch 2.9.1 and CUDA 12.8. +::: + +::: {.callout-tip} +Each image below is available in a **uv variant** that uses [uv](https://docs.astral.sh/uv/) with +a relocatable venv (`/workspace/axolotl-venv`) instead of Miniconda + pip. Append `-uv` to the image name +(e.g. `axolotlai/axolotl-base-uv`). Tags follow the same format. We recommend the uv images for new deployments. ::: ## Base -The base image is the most minimal image that can install Axolotl. It is based on the `nvidia/cuda` image. It includes python, torch, git, git-lfs, awscli, pydantic, and more. +The base image is the most minimal image that can install Axolotl. It is based on the `nvidia/cuda` image. +It includes python, torch, git, git-lfs, awscli, pydantic, and more. #### Image -``` -axolotlai/axolotl-base -``` - -Link: [Docker Hub](https://hub.docker.com/r/axolotlai/axolotl-base) +| Variant | Image | Docker Hub | +|---------|-------|------------| +| pip | `axolotlai/axolotl-base` | [Link](https://hub.docker.com/r/axolotlai/axolotl-base) | +| uv | `axolotlai/axolotl-base-uv` | [Link](https://hub.docker.com/r/axolotlai/axolotl-base-uv) | #### Tags format @@ -32,8 +39,10 @@ main-base-py{python_version}-cu{cuda_version}-{pytorch_version} Tags examples: -- `main-base-py3.11-cu128-2.8.0` - `main-base-py3.11-cu128-2.9.1` +- `main-base-py3.12-cu128-2.10.0` +- `main-base-py3.12-cu130-2.9.1` +- `main-base-py3.12-cu130-2.10.0` ## Main @@ -41,11 +50,10 @@ The main image is the image that is used to run Axolotl. It is based on the `axo #### Image -``` -axolotlai/axolotl -``` - -Link: [Docker Hub](https://hub.docker.com/r/axolotlai/axolotl) +| Variant | Image | Docker Hub | +|---------|-------|------------| +| pip | `axolotlai/axolotl` | [Link](https://hub.docker.com/r/axolotlai/axolotl) | +| uv | `axolotlai/axolotl-uv` | [Link](https://hub.docker.com/r/axolotlai/axolotl-uv) | #### Tags format {#sec-main-tags} @@ -53,7 +61,7 @@ Link: [Docker Hub](https://hub.docker.com/r/axolotlai/axolotl) # on push to main main-py{python_version}-cu{cuda_version}-{pytorch_version} -# latest main (currently torch 2.6.0, python 3.11, cuda 12.4) +# latest main (currently torch 2.9.1, python 3.11, cuda 12.8) main-latest # nightly build @@ -71,11 +79,12 @@ There may be some extra tags appended to the image, like `-vllm` which installs Tags examples: -- `main-py3.11-cu128-2.8.0` - `main-py3.11-cu128-2.9.1` +- `main-py3.12-cu128-2.10.0` +- `main-py3.12-cu130-2.9.1` +- `main-py3.12-cu130-2.10.0` - `main-latest` -- `main-20250303-py3.11-cu124-2.6.0` -- `main-20250303-py3.11-cu126-2.6.0` +- `main-20260315-py3.11-cu128-2.9.1` - `0.12.0` ## Cloud @@ -90,11 +99,10 @@ Jupyter lab is run by default. Set `JUPYTER_DISABLE=1` in the environment variab #### Image -``` -axolotlai/axolotl-cloud -``` - -Link: [Docker Hub](https://hub.docker.com/r/axolotlai/axolotl-cloud) +| Variant | Image | Docker Hub | +|---------|-------|------------| +| pip | `axolotlai/axolotl-cloud` | [Link](https://hub.docker.com/r/axolotlai/axolotl-cloud) | +| uv | `axolotlai/axolotl-cloud-uv` | [Link](https://hub.docker.com/r/axolotlai/axolotl-cloud-uv) | #### Tags format diff --git a/docs/installation.qmd b/docs/installation.qmd index 5df8f87e8..9d1d0d4a1 100644 --- a/docs/installation.qmd +++ b/docs/installation.qmd @@ -15,64 +15,30 @@ This guide covers all the ways you can install and set up Axolotl for your envir - NVIDIA GPU (Ampere architecture or newer for `bf16` and Flash Attention) or AMD GPU - Python ≥3.11 -- PyTorch ≥2.6.0 +- PyTorch ≥2.9.0 -## Installation Methods {#sec-installation-methods} - -::: {.callout-important} -Please make sure to have Pytorch installed before installing Axolotl in your local environment. - -Follow the instructions at: [https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/locally/) -::: +## Installation {#sec-installation} ::: {.callout-important} For Blackwell GPUs, please use Pytorch 2.9.1 and CUDA 12.8. ::: -### PyPI Installation (Recommended) {#sec-pypi} +### Quick Install {#sec-uv} -```{.bash} -pip3 install -U packaging setuptools wheel ninja -pip3 install --no-build-isolation axolotl[flash-attn,deepspeed] -``` +Axolotl uses [uv](https://docs.astral.sh/uv/) as its package manager. uv is a fast, reliable Python package installer and resolver built in Rust. -We use `--no-build-isolation` in order to detect the installed PyTorch version (if -installed) in order not to clobber it, and so that we set the correct version of -dependencies that are specific to the PyTorch version or other installed -co-dependencies. - -### uv Installation {#sec-uv} - -uv is a fast, reliable Python package installer and resolver built in Rust. It offers significant performance improvements over pip and provides better dependency resolution, making it an excellent choice for complex environments. - -Install uv if not already installed +Install uv if not already installed: ```{.bash} curl -LsSf https://astral.sh/uv/install.sh | sh source $HOME/.local/bin/env ``` -Choose your CUDA version to use with PyTorch; e.g. `cu124`, `cu126`, `cu128`, -then create the venv and activate +Choose your CUDA version (e.g. `cu128`, `cu130`), create a venv, and install: ```{.bash} -export UV_TORCH_BACKEND=cu126 +export UV_TORCH_BACKEND=cu128 # or cu130 uv venv --no-project --relocatable source .venv/bin/activate -``` - -Install PyTorch -- PyTorch 2.6.0 recommended -```{.bash} -uv pip install packaging setuptools wheel -uv pip install torch==2.6.0 -uv pip install awscli pydantic -``` - -Install axolotl from PyPi -```{.bash} -uv pip install --no-build-isolation axolotl[deepspeed,flash-attn] - -# optionally install with vLLM if you're using torch==2.6.0 and want to train w/ GRPO -uv pip install --no-build-isolation axolotl[deepspeed,flash-attn,vllm] +uv pip install --no-build-isolation axolotl[flash-attn,deepspeed] ``` ### Edge/Development Build {#sec-edge-build} @@ -82,14 +48,17 @@ For the latest features between releases: ```{.bash} git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install -U packaging setuptools wheel ninja -pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]' +export UV_TORCH_BACKEND=cu128 # or cu130 +uv sync --extra flash-attn --extra deepspeed +source .venv/bin/activate ``` +`uv sync` creates a `.venv`, installs exact pinned versions from `uv.lock`, and sets up an editable install automatically. + ### Docker {#sec-docker} ```{.bash} -docker run --gpus '"all"' --rm -it axolotlai/axolotl:main-latest +docker run --gpus '"all"' --rm -it --ipc=host axolotlai/axolotl-uv:main-latest ``` For development with Docker: @@ -106,12 +75,12 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it \ --ulimit memlock=-1 --ulimit stack=67108864 \ --mount type=bind,src="${PWD}",target=/workspace/axolotl \ -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ - axolotlai/axolotl:main-latest + axolotlai/axolotl-uv:main-latest ``` ::: ::: {.callout-important} -For Blackwell GPUs, please use `axolotlai/axolotl:main-py3.11-cu128-2.9.1` or the cloud variant `axolotlai/axolotl-cloud:main-py3.11-cu128-2.9.1`. +For Blackwell GPUs, please use `axolotlai/axolotl-uv:main-py3.11-cu128-2.9.1` or the cloud variant `axolotlai/axolotl-cloud-uv:main-py3.11-cu128-2.9.1`. ::: Please refer to the [Docker documentation](docker.qmd) for more information on the different Docker images that are available. @@ -122,7 +91,7 @@ Please refer to the [Docker documentation](docker.qmd) for more information on t For providers supporting Docker: -- Use `axolotlai/axolotl-cloud:main-latest` +- Use `axolotlai/axolotl-cloud-uv:main-latest` - Available on: - [RunPod](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz) - [Vast.ai](https://cloud.vast.ai?ref_id=62897&template_id=bdd4a49fa8bce926defc99471864cace&utm_source=axolotl&utm_medium=partner&utm_campaign=template_launch_july2025&utm_content=docs_link) @@ -141,7 +110,7 @@ For providers supporting Docker: ### macOS {#sec-macos} ```{.bash} -pip3 install --no-build-isolation -e '.' +uv pip install --no-build-isolation -e '.' ``` See @sec-troubleshooting for Mac-specific issues. @@ -152,21 +121,44 @@ See @sec-troubleshooting for Mac-specific issues. We recommend using WSL2 (Windows Subsystem for Linux) or Docker. ::: -## Environment Managers {#sec-env-managers} +## Migrating from pip to uv {#sec-migrating} -### Conda/Pip venv {#sec-conda} +If you have an existing pip-based Axolotl installation, you can migrate to uv: -1. Install Python ≥3.11 -2. Install PyTorch: https://pytorch.org/get-started/locally/ -3. Install Axolotl: - ```{.bash} - pip3 install -U packaging setuptools wheel ninja - pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]' - ``` -4. (Optional) Login to Hugging Face: - ```{.bash} - hf auth login - ``` +```{.bash} +# Install uv +curl -LsSf https://astral.sh/uv/install.sh | sh +source $HOME/.local/bin/env + +# Create a fresh venv (recommended for a clean start) +export UV_TORCH_BACKEND=cu128 # or cu130 +uv venv --no-project --relocatable +source .venv/bin/activate + +# Reinstall axolotl +uv pip install --no-build-isolation axolotl[flash-attn,deepspeed] +``` + +## Using pip (Alternative) {#sec-pip} + +If you are unable to install uv, you can still use pip directly. + +::: {.callout-important} +Please make sure to have PyTorch installed before installing Axolotl with pip. + +Follow the instructions at: [https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/locally/) +::: + +```{.bash} +pip3 install -U packaging setuptools wheel ninja +pip3 install --no-build-isolation axolotl[flash-attn,deepspeed] +``` + +For editable/development installs: +```{.bash} +pip3 install -U packaging setuptools wheel ninja +pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]' +``` ## Troubleshooting {#sec-troubleshooting} diff --git a/docs/unsloth.qmd b/docs/unsloth.qmd deleted file mode 100644 index fd87f7bde..000000000 --- a/docs/unsloth.qmd +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: "Unsloth" -description: "Hyper-optimized QLoRA finetuning for single GPUs" ---- - -### Overview - -Unsloth provides hand-written optimized kernels for LLM finetuning that slightly improve speed and VRAM over -standard industry baselines. - -::: {.callout-important} -Due to breaking changes in transformers `v4.48.0`, users will need to downgrade to `<=v4.47.1` to use this patch. - -This will later be deprecated in favor of [LoRA Optimizations](lora_optims.qmd). -::: - - -### Installation - -The following will install the correct unsloth and extras from source. - -```bash -python scripts/unsloth_install.py | sh -``` - -### Usage - -Axolotl exposes a few configuration options to try out unsloth and get most of the performance gains. - -Our unsloth integration is currently limited to the following model architectures: - - llama - -These options are specific to LoRA finetuning and cannot be used for multi-GPU finetuning -```yaml -unsloth_lora_mlp: true -unsloth_lora_qkv: true -unsloth_lora_o: true -``` - -These options are composable and can be used with multi-gpu finetuning -```yaml -unsloth_cross_entropy_loss: true -unsloth_rms_norm: true -unsloth_rope: true -``` - -### Limitations - -- Single GPU only; e.g. no multi-gpu support -- No deepspeed or FSDP support (requires multi-gpu) -- LoRA + QLoRA support only. No full fine tunes or fp8 support. -- Limited model architecture support. Llama, Phi, Gemma, Mistral only -- No MoE support. diff --git a/examples/LiquidAI/README.md b/examples/LiquidAI/README.md index 8a18d9eb1..0a08692d7 100644 --- a/examples/LiquidAI/README.md +++ b/examples/LiquidAI/README.md @@ -15,8 +15,7 @@ Thanks to the team at LiquidAI for giving us early access to prepare for these r Here is an example of how to install from pip: ```bash # Ensure you have a compatible version of Pytorch installed - pip3 install packaging setuptools wheel ninja - pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' + uv pip install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` 2. Run one of the finetuning examples below. @@ -35,7 +34,7 @@ Thanks to the team at LiquidAI for giving us early access to prepare for these r **LFM2-MoE** ```bash - pip install git+https://github.com/huggingface/transformers.git@0c9a72e4576fe4c84077f066e585129c97bfd4e6 + uv pip install git+https://github.com/huggingface/transformers.git@0c9a72e4576fe4c84077f066e585129c97bfd4e6 # LoRA SFT (1x48GB @ 16.2GiB) axolotl train examples/LiquidAI/lfm2-8b-a1b-lora.yaml @@ -45,7 +44,7 @@ Thanks to the team at LiquidAI for giving us early access to prepare for these r - **Installation Error**: If you encounter `ImportError: ... undefined symbol ...` or `ModuleNotFoundError: No module named 'causal_conv1d_cuda'`, the `causal-conv1d` package may have been installed incorrectly. Try uninstalling it: ```bash - pip uninstall -y causal-conv1d + uv pip uninstall causal-conv1d ``` - **Dataset Loading**: Read more on how to load your own dataset in our [documentation](https://docs.axolotl.ai/docs/dataset_loading.html). diff --git a/examples/apertus/README.md b/examples/apertus/README.md index 1cb4d413c..1280e430a 100644 --- a/examples/apertus/README.md +++ b/examples/apertus/README.md @@ -15,8 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation -e '.[flash-attn]' +uv pip install --no-build-isolation -e '.[flash-attn]' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy python scripts/cutcrossentropy_install.py | sh @@ -31,7 +30,7 @@ python scripts/cutcrossentropy_install.py | sh # For those using our Docker image, use the below path. export CUDA_HOME=/usr/local/cuda -pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps +uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps ``` For any installation errors, see [XIELU Installation Issues](#xielu-installation-issues) @@ -67,7 +66,7 @@ If those didn't help, please try the below solutions: 1. Pass env for CMAKE and try install again: ```bash - Python_EXECUTABLE=$(which python) pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps + Python_EXECUTABLE=$(which python) uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps ``` 2. Git clone the repo and manually hardcode python path: @@ -92,7 +91,7 @@ If those didn't help, please try the below solutions: ``` ```bash - pip3 install . --no-build-isolation --no-deps + uv pip install . --no-build-isolation --no-deps ``` ## Optimization Guides diff --git a/examples/arcee/README.md b/examples/arcee/README.md index ad554532c..deaea676a 100644 --- a/examples/arcee/README.md +++ b/examples/arcee/README.md @@ -17,8 +17,7 @@ Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the A git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation -e '.[flash-attn]' +uv pip install --no-build-isolation -e '.[flash-attn]' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy python scripts/cutcrossentropy_install.py | sh diff --git a/examples/devstral/README.md b/examples/devstral/README.md index 5a0145f10..2be8f6292 100644 --- a/examples/devstral/README.md +++ b/examples/devstral/README.md @@ -16,8 +16,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r ```bash # Ensure you have Pytorch installed (Pytorch 2.6.0 min) -pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' +uv pip install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` 2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage diff --git a/examples/gemma3n/README.md b/examples/gemma3n/README.md index 4808ed81b..1ecc96cbc 100644 --- a/examples/gemma3n/README.md +++ b/examples/gemma3n/README.md @@ -10,17 +10,16 @@ Gemma-3n is a family of multimodal models from Google found on [HuggingFace](htt ```bash # Ensure you have Pytorch installed (Pytorch 2.6.0 min) -pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' +uv pip install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` 2. In addition to Axolotl's requirements, Gemma-3n requires: ```bash -pip3 install timm==1.0.17 +uv pip install timm==1.0.17 # for loading audio data -pip3 install librosa==0.11.0 +uv pip install librosa==0.11.0 ``` 3. Download sample dataset files diff --git a/examples/gpt-oss/README.md b/examples/gpt-oss/README.md index 8c407540e..0e5eac500 100644 --- a/examples/gpt-oss/README.md +++ b/examples/gpt-oss/README.md @@ -14,8 +14,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations ```bash # Ensure you have Pytorch installed (Pytorch 2.6.0 min) -pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' +uv pip install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` 2. Choose one of the following configs below for training the 20B model. (for 120B, see [below](#training-120b)) @@ -87,7 +86,7 @@ for more information about using a special vllm-openai docker image for inferenc Optionally, vLLM can be installed from nightly: ```bash -pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly +uv pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly ``` and the vLLM server can be started with the following command (modify `--tensor-parallel-size 8` to match your environment): ```bash diff --git a/examples/granite4/README.md b/examples/granite4/README.md index 049539405..ceb599c1c 100644 --- a/examples/granite4/README.md +++ b/examples/granite4/README.md @@ -15,8 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation -e '.[flash-attn]' +uv pip install --no-build-isolation -e '.[flash-attn]' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy python scripts/cutcrossentropy_install.py | sh diff --git a/examples/hunyuan/README.md b/examples/hunyuan/README.md index 59e9a28c7..3071a0a61 100644 --- a/examples/hunyuan/README.md +++ b/examples/hunyuan/README.md @@ -13,8 +13,7 @@ Tencent released a family of opensource models called HunYuan with varying param git clone https://github.com/axolotl-ai-cloud/axolotl.git cd axolotl -pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation -e '.[flash-attn]' +uv pip install --no-build-isolation -e '.[flash-attn]' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy python scripts/cutcrossentropy_install.py | sh diff --git a/examples/internvl3_5/README.md b/examples/internvl3_5/README.md index d2584bb80..7424385bb 100644 --- a/examples/internvl3_5/README.md +++ b/examples/internvl3_5/README.md @@ -11,7 +11,7 @@ This guide shows how to fine-tune it with Axolotl. 2. Install `timm` for vision model support: ```bash - pip install timm==1.0.19 + uv pip install timm==1.0.19 ``` 3. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage. diff --git a/examples/magistral/README.md b/examples/magistral/README.md index 2e162df6b..172a40b2c 100644 --- a/examples/magistral/README.md +++ b/examples/magistral/README.md @@ -14,8 +14,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for these ```bash # Ensure you have Pytorch installed (Pytorch 2.7.0 min) -pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' +uv pip install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` 2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage diff --git a/examples/magistral/vision/README.md b/examples/magistral/vision/README.md index 72a8a2215..8babc904a 100644 --- a/examples/magistral/vision/README.md +++ b/examples/magistral/vision/README.md @@ -12,7 +12,7 @@ Before starting, ensure you have: 1. Install the required vision lib: ```bash - pip install 'mistral-common[opencv]==1.8.5' + uv pip install 'mistral-common[opencv]==1.8.5' ``` 2. Download the example dataset image: diff --git a/examples/ministral3/README.md b/examples/ministral3/README.md index 6ed7efda5..72f21b746 100644 --- a/examples/ministral3/README.md +++ b/examples/ministral3/README.md @@ -23,7 +23,7 @@ Note: This is still experimental given it is based on transformers v5 RC. git checkout transformers-v5 # Install packages for transformers v5 - pip install -e . + uv pip install -e . ``` 4. Run the fine-tuning: diff --git a/examples/ministral3/vision/README.md b/examples/ministral3/vision/README.md index 8193573eb..cc9a0f38b 100644 --- a/examples/ministral3/vision/README.md +++ b/examples/ministral3/vision/README.md @@ -12,7 +12,7 @@ Before starting, ensure you have: 1. Install the required vision lib: ```bash - pip install 'mistral-common[opencv]==1.8.6' + uv pip install 'mistral-common[opencv]==1.8.6' ``` 2. Download the example dataset image: diff --git a/examples/mistral-small/README.md b/examples/mistral-small/README.md index 7f7ec91e6..c5120aab7 100644 --- a/examples/mistral-small/README.md +++ b/examples/mistral-small/README.md @@ -12,7 +12,7 @@ Before starting, ensure you have: 1. Install the required vision lib: ```bash - pip install 'mistral-common[opencv]==1.8.5' + uv pip install 'mistral-common[opencv]==1.8.5' ``` 2. Download the example dataset image: diff --git a/examples/mistral4/README.md b/examples/mistral4/README.md index 3151069ba..ccbb03b21 100644 --- a/examples/mistral4/README.md +++ b/examples/mistral4/README.md @@ -13,7 +13,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r 3. Install transformers from main ```bash - pip install git+https://github.com/huggingface/transformers.git + uv pip install git+https://github.com/huggingface/transformers.git ``` 4. Run one of the example configs: diff --git a/examples/qwen3-next/README.md b/examples/qwen3-next/README.md index df87ca725..05b512d29 100644 --- a/examples/qwen3-next/README.md +++ b/examples/qwen3-next/README.md @@ -12,7 +12,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations 3. Install FLA for improved performance ```bash -pip3 uninstall -y causal-conv1d && pip3 install flash-linear-attention==0.4.1 +uv pip uninstall causal-conv1d && uv pip install flash-linear-attention==0.4.1 ``` 4. Run the finetuning example: diff --git a/examples/qwen3.5/README.md b/examples/qwen3.5/README.md index b5089d727..22e9e360d 100644 --- a/examples/qwen3.5/README.md +++ b/examples/qwen3.5/README.md @@ -10,7 +10,7 @@ 3. Install FLA for sample packing support with the Gated DeltaNet linear attention layers: ```bash - pip3 uninstall -y causal-conv1d && pip3 install flash-linear-attention==0.4.1 + uv pip uninstall causal-conv1d && uv pip install flash-linear-attention==0.4.1 ``` > FLA is required when `sample_packing: true`. Without it, training raises a `RuntimeError` on packed sequences. Vision configs use `sample_packing: false` so FLA is optional there. diff --git a/examples/seed-oss/README.md b/examples/seed-oss/README.md index aeb8635e3..796ef118d 100644 --- a/examples/seed-oss/README.md +++ b/examples/seed-oss/README.md @@ -11,8 +11,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations Here is an example of how to install from pip: ```bash # Ensure you have a compatible version of Pytorch installed - pip3 install packaging setuptools wheel ninja - pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' + uv pip install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' # Install Cut Cross Entropy python scripts/cutcrossentropy_install.py | sh diff --git a/examples/smolvlm2/README.md b/examples/smolvlm2/README.md index 74c1a1c0f..da83e612c 100644 --- a/examples/smolvlm2/README.md +++ b/examples/smolvlm2/README.md @@ -13,14 +13,13 @@ This guide shows how to fine-tune SmolVLM2 models with Axolotl. Here is an example of how to install from pip: ```bash # Ensure you have a compatible version of Pytorch installed - pip3 install packaging setuptools wheel ninja - pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' + uv pip install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` 2. Install an extra dependency: ```bash - pip3 install num2words==0.5.14 + uv pip install num2words==0.5.14 ``` 3. Run the finetuning example: diff --git a/examples/voxtral/README.md b/examples/voxtral/README.md index 2d3cad4e9..ed5cc6422 100644 --- a/examples/voxtral/README.md +++ b/examples/voxtral/README.md @@ -12,16 +12,15 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r ```bash # Ensure you have Pytorch installed (Pytorch 2.6.0 min) -pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja -pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' +uv pip install --no-build-isolation 'axolotl[flash-attn]>=0.12.0' ``` 2. Please install the below. ```bash # audio -pip3 install librosa==0.11.0 -pip3 install 'mistral_common[audio]==1.8.3' +uv pip install librosa==0.11.0 +uv pip install 'mistral_common[audio]==1.8.3' # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy python scripts/cutcrossentropy_install.py | sh diff --git a/pyproject.toml b/pyproject.toml index 9cee4a520..d028b394d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,165 @@ [build-system] -requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==26.0"] +requires = ["setuptools>=64", "wheel", "setuptools_scm>=8"] build-backend = "setuptools.build_meta" [project] name = "axolotl" -dynamic = ["version", "dependencies", "optional-dependencies"] +dynamic = ["version"] description = "LLM Trainer" readme = "README.md" requires-python = ">=3.10" # license = "Apache-2.0" +dependencies = [ + # Core ML stack + "torch>=2.6.0", + "packaging==26.0", + "huggingface_hub>=1.1.7", + "peft>=0.19.1,<0.20.0", + "tokenizers>=0.22.1", + "transformers==5.5.4", + "accelerate==1.13.0", + "datasets>=4.8.4,<4.9.0", + "trl==1.1.0", + "hf_xet==1.4.3", + "kernels==0.13.0", + "trackio>=0.16.1", + "typing-extensions>=4.15.0", + "optimum==1.16.2", + "hf_transfer", + "sentencepiece", + "gradio>=6.2.0,<7.0", + "modal==1.3.0.post1", + "pydantic>=2.10.6", + "addict", + "fire", + "PyYAML>=6.0", + "requests", + "wandb", + "einops", + "colorama", + "numba>=0.61.2", + "numpy>=2.2.6", + + # Evaluation & metrics + "evaluate==0.4.1", + "scipy", + "nvidia-ml-py==12.560.30", + "art", + "tensorboard", + "python-dotenv==1.0.1", + + # Remote filesystems + "s3fs>=2024.5.0", + "gcsfs>=2025.3.0", + "adlfs>=2024.5.0", + "ocifs==1.3.2", + + "zstandard==0.22.0", + "fastcore", + + # lm eval harness + "lm_eval==0.4.11", + "langdetect==1.0.9", + "immutabledict==4.2.0", + "antlr4-python3-runtime==4.13.2", + + "schedulefree==1.4.1", + "openenv-core==0.1.0", + + # Axolotl contribs + "axolotl-contribs-lgpl==0.0.7", + "axolotl-contribs-mit==0.0.6", + + # Telemetry + "posthog==6.7.11", + + "mistral-common==1.11.0", + + # Platform-specific (Linux only) + "bitsandbytes==0.49.1 ; sys_platform != 'darwin'", + "triton>=3.4.0 ; sys_platform != 'darwin'", + "xformers>=0.0.23.post1 ; sys_platform != 'darwin'", + "liger-kernel==0.7.0 ; sys_platform != 'darwin'", + "torchao==0.17.0 ; sys_platform != 'darwin' and platform_machine != 'aarch64'", + + # Architecture-specific + "fla-core==0.4.1 ; platform_machine != 'aarch64'", + "flash-linear-attention==0.4.1 ; platform_machine != 'aarch64'", +] + +[project.optional-dependencies] +flash-attn = ["flash-attn==2.8.3"] +ring-flash-attn = [ + "flash-attn==2.8.3", + "ring-flash-attn>=0.1.7", +] +deepspeed = [ + "deepspeed>=0.18.6,<0.19.0", + "deepspeed-kernels", +] +mamba-ssm = [ + "mamba-ssm==1.2.0.post1", + "causal_conv1d", +] +auto-gptq = [ + "auto-gptq==0.5.1", +] +mlflow = [ + "mlflow", +] +galore = [ + "galore_torch", +] +apollo = [ + "apollo-torch", +] +optimizers = [ + "galore_torch", + "apollo-torch", + "lomo-optim==0.1.1", + "torch-optimi==0.2.1", + "came_pytorch==0.1.3", +] +ray = [ + "ray[train]>=2.52.1", +] +vllm = [ + "vllm>=0.15.0", +] +llmcompressor = [ + "llmcompressor>=0.10.0", +] +fbgemm-gpu = ["fbgemm-gpu-genai>=1.3.0"] +opentelemetry = [ + "opentelemetry-api", + "opentelemetry-sdk", + "opentelemetry-exporter-prometheus", + "prometheus-client", +] + +[dependency-groups] +dev = [ + "black", + "mypy", + "pre-commit", + "types-requests", + "quartodoc", + "jupyter", + "blobfile", + "tiktoken", +] +test = [ + "codecov", + "codecov-cli", + "pytest", + "pytest-cov", + "pytest-retry", + "pytest-sugar", + "pytest-xdist", + "tbparse", +] + [project.scripts] axolotl = "axolotl.cli.main:main" @@ -18,18 +168,15 @@ Homepage = "https://axolotl.ai/" Documentation = "https://docs.axolotl.ai/" Repository = "https://github.com/axolotl-ai-cloud/axolotl.git" -[tool.setuptools_scm] - [tool.setuptools] -py-modules = ["setuptools_axolotl_dynamic_dependencies"] include-package-data = true +[tool.setuptools.packages.find] +where = ["src"] + [tool.setuptools.dynamic] version = { file = "VERSION" } -[tool.setuptools.cmdclass] -build_py = "setuptools_axolotl_dynamic_dependencies.BuildPyCommand" - [tool.ruff] line-length = 88 target-version = "py310" @@ -67,5 +214,43 @@ markers = [ "slow: marks tests as slow", ] +# UV specific configuration +[tool.uv] +prerelease = "allow" +conflicts = [ + [ + { package = "axolotl" }, + { extra = "vllm" }, + ], + [ + { package = "axolotl" }, + { extra = "flash-attn" }, + ], + [ + { package = "axolotl" }, + { extra = "ring-flash-attn" }, + ], + [ + { package = "axolotl" }, + { extra = "mamba-ssm" }, + ], + [ + { package = "axolotl" }, + { extra = "auto-gptq" }, + ], + [ + { package = "axolotl" }, + { extra = "fbgemm-gpu" }, + ], + [ + { package = "axolotl" }, + { extra = "llmcompressor" }, + ], +] + [tool.uv.extra-build-dependencies] -axolotl = ["huggingface_hub"] +mamba-ssm = [{ requirement = "torch", match-runtime = true }] +causal-conv1d = [{ requirement = "torch", match-runtime = true }] +flash-attn = [{ requirement = "torch", match-runtime = true }] +deepspeed = [{ requirement = "torch", match-runtime = true }] +auto-gptq = [{ requirement = "torch", match-runtime = true }] diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 5c42d96d4..000000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,8 +0,0 @@ -black -mypy -pre-commit -types-requests -quartodoc -jupyter -blobfile -tiktoken diff --git a/requirements-tests.txt b/requirements-tests.txt deleted file mode 100644 index 93b2ceee5..000000000 --- a/requirements-tests.txt +++ /dev/null @@ -1,8 +0,0 @@ -codecov -codecov-cli -pytest -pytest-cov -pytest-retry -pytest-sugar -pytest-xdist -tbparse diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index bb3fc8daa..000000000 --- a/requirements.txt +++ /dev/null @@ -1,78 +0,0 @@ ---extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ - -# START section of dependencies that don't install on Darwin/MacOS -bitsandbytes==0.49.1 -triton>=3.4.0 -mamba-ssm==1.2.0.post1 -xformers>=0.0.23.post1 -liger-kernel==0.7.0 -# END section - -packaging==26.0 -huggingface_hub>=1.1.7 -peft>=0.19.0,<0.20.0 -tokenizers>=0.22.1 -transformers==5.5.4 -accelerate==1.13.0 -datasets>=4.8.4,<4.9.0 -deepspeed>=0.18.6,<0.19.0 -trl==1.1.0 -hf_xet==1.4.3 -kernels==0.13.0 - -fla-core==0.4.1 -flash-linear-attention==0.4.1 - -trackio>=0.16.1 -typing-extensions>=4.15.0 - -optimum==1.16.2 -hf_transfer -sentencepiece -gradio>=6.2.0,<7.0 - -modal==1.3.0.post1 -pydantic>=2.10.6 -addict -fire -PyYAML>=6.0 -requests -wandb -einops -colorama -numba>=0.61.2 -numpy>=2.2.6 - -# qlora things -evaluate==0.4.1 -scipy -nvidia-ml-py==12.560.30 -art -tensorboard -python-dotenv==1.0.1 - -# remote filesystems -s3fs>=2024.5.0 -gcsfs>=2025.3.0 -adlfs>=2024.5.0 -ocifs==1.3.2 - -zstandard==0.22.0 -fastcore - -# lm eval harness -lm_eval==0.4.11 -langdetect==1.0.9 -immutabledict==4.2.0 -antlr4-python3-runtime==4.13.2 - -torchao==0.17.0 -openenv-core==0.1.0 -schedulefree==1.4.1 - -axolotl-contribs-lgpl==0.0.7 -axolotl-contribs-mit==0.0.6 -# telemetry -posthog==6.7.11 - -mistral-common==1.11.0 diff --git a/scripts/unsloth_install.py b/scripts/unsloth_install.py deleted file mode 100644 index c0e5bbe70..000000000 --- a/scripts/unsloth_install.py +++ /dev/null @@ -1,40 +0,0 @@ -# noqa -import sys - -try: - import torch -except ImportError as error: - raise ImportError("Install torch via `pip install torch`") from error -from packaging.version import Version as V - -use_uv = "--uv" in sys.argv[1:] - -v = V(torch.__version__) -cuda = str(torch.version.cuda) -try: - is_ampere = torch.cuda.get_device_capability()[0] >= 8 -except RuntimeError: - is_ampere = False -if cuda != "12.1" and cuda != "11.8" and cuda != "12.4": - raise RuntimeError(f"CUDA = {cuda} not supported!") -if v <= V("2.1.0"): - raise RuntimeError(f"Torch = {v} too old!") -elif v <= V("2.1.1"): - x = "cu{}{}-torch211" -elif v <= V("2.1.2"): - x = "cu{}{}-torch212" -elif v < V("2.3.0"): - x = "cu{}{}-torch220" -elif v < V("2.4.0"): - x = "cu{}{}-torch230" -elif v < V("2.5.0"): - x = "cu{}{}-torch240" -elif v < V("2.6.0"): - x = "cu{}{}-torch250" -else: - raise RuntimeError(f"Torch = {v} too new!") -x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "") -uv_prefix = "uv " if use_uv else "" -print( - f'{uv_prefix}pip install unsloth-zoo==2024.12.1 && {uv_prefix}pip install --no-deps "unsloth[{x}]==2024.12.4"' -) diff --git a/setup.py b/setup.py deleted file mode 100644 index 99879597b..000000000 --- a/setup.py +++ /dev/null @@ -1,230 +0,0 @@ -"""setup.py for axolotl""" - -import os -import platform -import re -from importlib.metadata import PackageNotFoundError, version -from pathlib import Path - -from setuptools import find_packages, setup - - -def parse_requirements(extras_require_map): - _install_requires = [] - _dependency_links = [] - with open("./requirements.txt", encoding="utf-8") as requirements_file: - lines = [r.strip() for r in requirements_file.readlines()] - for line in lines: - is_extras = "deepspeed" in line or "mamba-ssm" in line - if line.startswith("--extra-index-url"): - # Handle custom index URLs - _, url = line.split() - _dependency_links.append(url) - elif not is_extras and line and line[0] != "#": - # Handle standard packages - _install_requires.append(line) - try: - xformers_version = [req for req in _install_requires if "xformers" in req][0] - install_xformers = platform.machine() != "aarch64" - if platform.machine() == "aarch64": - # skip on ARM64 - skip_packages = [ - "torchao", - "fla-core", - "flash-linear-attention", - ] - _install_requires = [ - req - for req in _install_requires - if re.split(r"[>=<]", req)[0].strip() not in skip_packages - ] - if "Darwin" in platform.system(): - # skip packages not compatible with OSX - skip_packages = [ - "bitsandbytes", - "triton", - "mamba-ssm", - "xformers", - "liger-kernel", - ] - _install_requires = [ - req - for req in _install_requires - if re.split(r"[>=<]", req)[0].strip() not in skip_packages - ] - print( - _install_requires, [req in skip_packages for req in _install_requires] - ) - else: - # detect the version of torch already installed - # and set it so dependencies don't clobber the torch version - try: - torch_version = version("torch") - except PackageNotFoundError: - torch_version = "2.8.0" # default to torch 2.8.0 - _install_requires.append(f"torch=={torch_version}") - - version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version) - if version_match: - major, minor, patch = version_match.groups() - major, minor = int(major), int(minor) - patch = ( - int(patch) if patch is not None else 0 - ) # Default patch to 0 if not present - else: - raise ValueError("Invalid version format") - - torch_parts = torch_version.split("+") - if len(torch_parts) == 2: - torch_cuda_version = torch_parts[1] - _dependency_links.append( - f"https://download.pytorch.org/whl/{torch_cuda_version}" - ) - - if (major, minor) >= (2, 10): - extras_require_map.pop("fbgemm-gpu") - extras_require_map["fbgemm-gpu"] = [ - "fbgemm-gpu==1.5.0", - "fbgemm-gpu-genai==1.5.0", - ] - if not install_xformers: - _install_requires.pop(_install_requires.index(xformers_version)) - extras_require_map["vllm"] = ["vllm>=0.19.0"] - elif (major, minor) >= (2, 9): - extras_require_map.pop("fbgemm-gpu") - extras_require_map["fbgemm-gpu"] = [ - "fbgemm-gpu==1.4.0", - "fbgemm-gpu-genai==1.4.2", - ] - if not install_xformers: - _install_requires.pop(_install_requires.index(xformers_version)) - if patch == 0: - extras_require_map["vllm"] = ["vllm==0.13.0"] - else: - extras_require_map["vllm"] = ["vllm==0.14.0"] - elif (major, minor) >= (2, 8): - extras_require_map.pop("fbgemm-gpu") - extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.3.0"] - extras_require_map["vllm"] = ["vllm==0.11.0"] - if not install_xformers: - _install_requires.pop(_install_requires.index(xformers_version)) - elif (major, minor) >= (2, 7): - _install_requires.pop(_install_requires.index(xformers_version)) - if patch == 0: - if install_xformers: - _install_requires.append("xformers==0.0.30") - # vllm 0.9.x is incompatible with latest transformers - extras_require_map.pop("vllm") - else: - if install_xformers: - _install_requires.append("xformers==0.0.31") - extras_require_map["vllm"] = ["vllm==0.10.1"] - elif (major, minor) >= (2, 6): - _install_requires.pop(_install_requires.index(xformers_version)) - if install_xformers: - _install_requires.append("xformers==0.0.29.post3") - # since we only support 2.6.0+cu126 - _dependency_links.append("https://download.pytorch.org/whl/cu126") - extras_require_map.pop("vllm") - elif (major, minor) >= (2, 5): - _install_requires.pop(_install_requires.index(xformers_version)) - if install_xformers: - if patch == 0: - _install_requires.append("xformers==0.0.28.post2") - else: - _install_requires.append("xformers>=0.0.28.post3") - extras_require_map.pop("vllm") - elif (major, minor) >= (2, 4): - extras_require_map.pop("vllm") - if install_xformers: - if patch == 0: - _install_requires.pop(_install_requires.index(xformers_version)) - _install_requires.append("xformers>=0.0.27") - else: - _install_requires.pop(_install_requires.index(xformers_version)) - _install_requires.append("xformers==0.0.28.post1") - else: - raise ValueError("axolotl requires torch>=2.4") - - except PackageNotFoundError: - pass - return _install_requires, _dependency_links, extras_require_map - - -def get_package_version(): - with open( - Path(os.path.dirname(os.path.abspath(__file__))) / "VERSION", - "r", - encoding="utf-8", - ) as fin: - version_ = fin.read().strip() - return version_ - - -extras_require = { - "flash-attn": ["flash-attn==2.8.3"], - "ring-flash-attn": [ - "flash-attn==2.8.3", - "ring-flash-attn>=0.1.7", - ], - "deepspeed": [ - "deepspeed==0.18.2", - "deepspeed-kernels", - ], - "mamba-ssm": [ - "mamba-ssm==1.2.0.post1", - "causal_conv1d", - ], - "auto-gptq": [ - "auto-gptq==0.5.1", - ], - "mlflow": [ - "mlflow", - ], - "galore": [ - "galore_torch", - ], - "apollo": [ - "apollo-torch", - ], - "optimizers": [ - "galore_torch", - "apollo-torch", - "lomo-optim==0.1.1", - "torch-optimi==0.2.1", - "came_pytorch==0.1.3", - ], - "ray": [ - "ray[train]>=2.52.1", - ], - "vllm": [ - "vllm==0.10.0", - ], - "llmcompressor": [ - "llmcompressor==0.5.1", - ], - "fbgemm-gpu": ["fbgemm-gpu-genai==1.3.0"], - "opentelemetry": [ - "opentelemetry-api", - "opentelemetry-sdk", - "opentelemetry-exporter-prometheus", - "prometheus-client", - ], -} -install_requires, dependency_links, extras_require_build = parse_requirements( - extras_require -) - -setup( - version=get_package_version(), - package_dir={"": "src"}, - packages=find_packages("src"), - install_requires=install_requires, - dependency_links=dependency_links, - entry_points={ - "console_scripts": [ - "axolotl=axolotl.cli.main:main", - ], - }, - extras_require=extras_require_build, -) diff --git a/src/axolotl/cli/utils/lora_merge.py b/src/axolotl/cli/utils/lora_merge.py index a07395587..81cc2cbea 100644 --- a/src/axolotl/cli/utils/lora_merge.py +++ b/src/axolotl/cli/utils/lora_merge.py @@ -339,7 +339,11 @@ def _build_peft_layer_and_get_delta( ) layer.lora_A[adapter_name].weight.data = lora_a layer.lora_B[adapter_name].weight.data = lora_b - return layer.get_delta_weight(adapter_name) + delta = layer.get_delta_weight(adapter_name) + # peft >=0.19.1 may return delta with transposed dims for 3D params + if delta.shape != base_tensor.shape and delta.ndim == 3: + delta = delta.transpose(1, 2).contiguous() + return delta elif ( layer_type and "Conv" in layer_type or (layer_type is None and lora_a.ndim > 2) ): diff --git a/src/axolotl/integrations/kernels/libs/scattermoe_lora/layers.py b/src/axolotl/integrations/kernels/libs/scattermoe_lora/layers.py index c6c01e255..8fd10c8e9 100644 --- a/src/axolotl/integrations/kernels/libs/scattermoe_lora/layers.py +++ b/src/axolotl/integrations/kernels/libs/scattermoe_lora/layers.py @@ -60,49 +60,14 @@ def peft_lora_B_to_scattermoe(peft_B, num_experts, rank): def peft_lora_to_scattermoe(peft_A, peft_B, num_experts, rank): - """Convert peft LoRA weights to scattermoe layout (with A<->B swap). + """Convert peft LoRA weights to scattermoe layout. - peft operates on the parameter in its native storage layout ``[E, dim1, dim2]`` - where ``in_features=dim1, out_features=dim2``. ScatterMoE transposes the - parameter (``W = param.transpose(2, 1)``) giving ``[E, dim2, dim1]`` with - ``K=dim2, N=dim1``. Because of this transposition, peft's A and B roles - are swapped relative to scattermoe's convention. - - peft gives: - lora_A ``[r*E, dim1]``, lora_B ``[dim2, r*E]`` - - scattermoe needs: - lora_A ``[r*E, K=dim2]``, lora_B ``[N=dim1, r*E]`` - - This function swaps A<->B and converts B from rank-major to expert-major. - Uses vectorized tensor operations (no Python loop over experts). - - Works for **both** gate_up_proj and down_proj since the transposition - issue is the same for any parameter. + peft >=0.19.1 assigns in/out features for 3D params such that + A and B already align with scattermoe's convention (no A<->B swap). + Only B needs rank-major → expert-major layout conversion. """ - peft_B_em = peft_lora_B_to_scattermoe(peft_B, num_experts, rank) - - dim1 = peft_A.shape[1] # peft in_features -> scattermoe N - dim2 = peft_B_em.shape[0] # peft out_features -> scattermoe K - - # smoe_A: per expert, transpose B_e [dim2, r] -> [r, dim2] - # [dim2, E*r] -> [dim2, E, r] -> [E, r, dim2] -> [E*r, dim2] - smoe_A = ( - peft_B_em.reshape(dim2, num_experts, rank) - .permute(1, 2, 0) - .contiguous() - .reshape(rank * num_experts, dim2) - ) - - # smoe_B: per expert, transpose A_e [r, dim1] -> [dim1, r] - # [E*r, dim1] -> [E, r, dim1] -> [dim1, E, r] -> [dim1, E*r] - smoe_B = ( - peft_A.reshape(num_experts, rank, dim1) - .permute(2, 0, 1) - .contiguous() - .reshape(dim1, num_experts * rank) - ) - + smoe_A = peft_A + smoe_B = peft_lora_B_to_scattermoe(peft_B, num_experts, rank) return smoe_A, smoe_B diff --git a/src/axolotl/loaders/patch_manager.py b/src/axolotl/loaders/patch_manager.py index 41fc35e6e..f32b7c12e 100644 --- a/src/axolotl/loaders/patch_manager.py +++ b/src/axolotl/loaders/patch_manager.py @@ -162,7 +162,6 @@ class PatchManager: def apply_post_model_load_patches(self, model: PreTrainedModel): """Apply patches that require the model instance.""" self._apply_llama_flash_attn_patches(model) - self._apply_unsloth_patches(model) self._apply_lora_kernel_patch(model) self._apply_scaling_softmax_patch(model) @@ -674,24 +673,10 @@ class PatchManager: ) patch_fa_llama_cross_entropy() - elif self.cfg.unsloth_cross_entropy_loss: - from axolotl.monkeypatch.unsloth_ import integrate_cross_entropy_loss_patch - - integrate_cross_entropy_loss_patch(model_type="llama") - if self.cfg.flash_attn_rms_norm and self.has_flash_attn: from axolotl.monkeypatch.llama_attn_hijack_flash import patch_llama_rms_norm patch_llama_rms_norm() - elif self.cfg.unsloth_rms_norm: - from axolotl.monkeypatch.unsloth_ import patch_unsloth_layernorm - - patch_unsloth_layernorm() - - if self.cfg.unsloth_lora_qkv or self.cfg.unsloth_lora_o: - from axolotl.monkeypatch.unsloth_ import patch_self_attn_lora - - patch_self_attn_lora() def _patch_llama_flash_attention(self): """Apply Flash Attention patches for LLaMA models.""" @@ -758,23 +743,6 @@ class PatchManager: LOG.info("Patching with SwiGLU...") replace_llama_mlp_with_swiglu(model) - def _apply_unsloth_patches(self, model): - """Apply unsloth optimization patches.""" - if self.cfg.unsloth_lora_mlp: - from axolotl.monkeypatch.unsloth_ import integrate_lora_mlp_patch - - integrate_lora_mlp_patch(peft_model=model) - - if self.cfg.unsloth_lora_qkv or self.cfg.unsloth_lora_o: - from axolotl.monkeypatch.unsloth_ import integrate_lora_patch - - integrate_lora_patch(peft_model=model, cfg=self.cfg) - - if self.cfg.unsloth_rope: - from axolotl.monkeypatch.unsloth_ import integrate_rope_embeddings - - integrate_rope_embeddings() - def _apply_lora_kernel_patch(self, model): """Apply LoRA kernel patches.""" if ( diff --git a/src/axolotl/monkeypatch/unsloth_.py b/src/axolotl/monkeypatch/unsloth_.py deleted file mode 100644 index 59f32c6f5..000000000 --- a/src/axolotl/monkeypatch/unsloth_.py +++ /dev/null @@ -1,252 +0,0 @@ -"""module for patching with unsloth optimizations""" - -import inspect -import types - -import torch -from peft import PeftModelForCausalLM -from torch import nn -from transformers.models.llama.modeling_llama import LlamaFlashAttention2 - -from axolotl.monkeypatch.utils import detab_code -from axolotl.utils.logging import get_logger - -LOG = get_logger(__name__) - -ORIGINAL_QKV_CODE = """ - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) -""".lstrip("\n") - -PATCHED_QKV_CODE = """ - query_states, key_states, value_states = self.apply_qkv(self, hidden_states) -""".lstrip("\n") - -ORIGINAL_O_CODE = """ - attn_output = self.o_proj(attn_output) -""".lstrip("\n") - -PATCHED_O_CODE = """ - attn_output = self.apply_o(self, attn_output) -""".lstrip("\n") - - -def original_apply_qkv(self, hidden_states): - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) - return query_states, key_states, value_states - - -def original_apply_o(self, hidden_states): - attn_output = self.o_proj(hidden_states) - return attn_output - - -def get_self_attn_code() -> str: - forward = inspect.getsource(LlamaFlashAttention2.forward) - return forward - - -def check_self_attn_is_patchable() -> bool: - qkv = get_self_attn_code() - qkv, _ = detab_code(qkv) - return ORIGINAL_QKV_CODE in qkv and ORIGINAL_O_CODE in qkv - - -def integrate_cross_entropy_loss_patch(model_type: str = "llama") -> None: - from unsloth.kernels.cross_entropy_loss import fast_cross_entropy_loss - - def UnslothForCausalLMLoss( - logits, - labels, - vocab_size: int, - num_items_in_batch: int = None, - ignore_index: int = -100, - **kwargs, - ): - # Upcast to float if we need to compute the loss to avoid potential precision issues - logits = logits.float() - # Shift so that tokens < n predict n - shift_logits = logits[..., :-1, :].contiguous() - shift_labels = labels[..., 1:].contiguous() - - loss = fast_cross_entropy_loss( - logits=shift_logits, labels=shift_labels, n_items=num_items_in_batch - ) - return loss - - if model_type == "llama": - from transformers.loss import loss_utils - - loss_utils.ForCausalLMLoss = UnslothForCausalLMLoss # type: ignore[assignment] - else: - raise ValueError("Unsupported model type") - - -self_attn_lora_patched = False - - -def patch_self_attn_lora(): - global self_attn_lora_patched - if self_attn_lora_patched: - # prevent patching multiple times - return - self_attn_forward = get_self_attn_code() - LlamaFlashAttention2._original_forward = self_attn_forward - self_attn_forward, _ = detab_code(self_attn_forward) - assert ORIGINAL_QKV_CODE in self_attn_forward, "Original qkv code not found" - assert ORIGINAL_O_CODE in self_attn_forward, "Original o code not found" - - self_attn_forward = self_attn_forward.replace(ORIGINAL_QKV_CODE, PATCHED_QKV_CODE) - self_attn_forward = self_attn_forward.replace(ORIGINAL_O_CODE, PATCHED_O_CODE) - self_attn_forward = self_attn_forward.replace( - "def forward(", - "def unsloth_attn_forward(", - 1, - ) - - # load imports necessary - import transformers.models.llama.modeling_llama - - items_to_import = [] - for item in dir(transformers.models.llama.modeling_llama): - if item in self_attn_forward: - items_to_import.append(item) - - exec( - "from transformers.models.llama.modeling_llama import (" - + ", ".join(x for x in items_to_import) - + ")", - globals(), - ) - exec(self_attn_forward, globals()) - self_attn_lora_patched = True - LOG.info("patching unsloth attn lora") - LlamaFlashAttention2.forward = unsloth_attn_forward - - -def integrate_rope_embeddings(): - import transformers.models.llama.modeling_llama - from unsloth.kernels.rope_embedding import fast_rope_embedding - - def apply_rotary_pos_emb( - q, - k, - cos, - sin, - position_ids=None, - unsqueeze_dim=1, - ): - return fast_rope_embedding(q, k, cos, sin) - - LOG.info("patching unsloth RoPE embeddings") - transformers.models.llama.modeling_llama.apply_rotary_pos_emb = apply_rotary_pos_emb - - -def integrate_lora_mlp_patch(peft_model: PeftModelForCausalLM): - if peft_model.base_model.config.model_type in ["llama", "mistral"]: - from unsloth.kernels import apply_lora_mlp_swiglu - - apply_lora_mlp = apply_lora_mlp_swiglu - elif peft_model.base_model.config.model_type == "gemma": - from unsloth.kernels import apply_lora_mlp_geglu_approx - - apply_lora_mlp = apply_lora_mlp_geglu_approx - else: - raise NotImplementedError( - f"Model type {peft_model.base_model.config.model_type} not supported" - ) - - for idx, layer in enumerate(peft_model.model.model.layers): - layer_modules = [ - getattr(layer.mlp, linear_proj) - for linear_proj in ["gate_proj", "up_proj", "down_proj"] - ] - is_mlp_lora = all(hasattr(module, "lora_A") for module in layer_modules) - mlp_no_bias = all( - getattr(module, "base_layer", module).bias is None - for module in layer_modules - ) - mlp_not_dora = all( - len(getattr(module, "lora_magnitude_vector", []) or []) == 0 - for module in layer_modules - ) - - if is_mlp_lora and mlp_no_bias and mlp_not_dora: - layer.mlp.forward = types.MethodType(apply_lora_mlp, layer.mlp) - else: - LOG.warning(f"unable to apply unsloth lora mlp patch to layer {idx}") - - -def integrate_lora_patch(peft_model: PeftModelForCausalLM, cfg): - from unsloth.kernels import apply_lora_o, apply_lora_qkv - - for idx, layer in enumerate(peft_model.model.model.layers): - if cfg.unsloth_lora_qkv: - layer_modules = [ - getattr(layer.self_attn, linear_proj) - for linear_proj in ["q_proj", "k_proj", "v_proj"] - ] - is_qkv_lora = all(hasattr(module, "lora_A") for module in layer_modules) - qkv_no_bias = all( - getattr(module, "base_layer", module).bias is None - for module in layer_modules - ) - qkv_not_dora = all( - len(getattr(module, "lora_magnitude_vector", []) or []) == 0 - for module in layer_modules - ) - - if is_qkv_lora and qkv_no_bias and qkv_not_dora: - layer.self_attn.apply_qkv = apply_lora_qkv - else: - layer.self_attn.apply_qkv = original_apply_qkv - LOG.warning(f"unable to apply unsloth lora qkv patch to layer {idx}") - if cfg.unsloth_lora_o: - layer_modules = [ - getattr(layer.self_attn, linear_proj) for linear_proj in ["o_proj"] - ] - is_o_lora = all(hasattr(module, "lora_A") for module in layer_modules) - o_no_bias = all( - getattr(module, "base_layer", module).bias is None - for module in layer_modules - ) - o_not_dora = all( - len(getattr(module, "lora_magnitude_vector", []) or []) == 0 - for module in layer_modules - ) - - if is_o_lora and o_no_bias and o_not_dora: - layer.self_attn.apply_o = apply_lora_o - else: - layer.self_attn.apply_o = original_apply_o - LOG.warning(f"unable to apply unsloth lora o_proj patch to layer {idx}") - - -def patch_unsloth_layernorm(): - try: - import transformers.models.llama.modeling_llama - from unsloth.kernels.rms_layernorm import Fast_RMS_Layernorm - - class LlamaRMSNorm(nn.Module): - """LlamaRMSNorm""" - - def __init__(self, hidden_size, eps=1e-6): - """ - LlamaRMSNorm is equivalent to T5LayerNorm - """ - super().__init__() - self.weight = nn.Parameter(torch.ones(hidden_size)) - self.variance_epsilon = eps - - def forward(self, hidden_states): - return Fast_RMS_Layernorm.apply( - hidden_states, self.weight, self.variance_epsilon, False - ) - - LOG.info("patching with unsloth.kernels.rms_layernorm") - transformers.models.llama.modeling_llama.LlamaRMSNorm = LlamaRMSNorm - except ImportError: - LOG.warning("missing unsloth library") diff --git a/src/axolotl/utils/schemas/config.py b/src/axolotl/utils/schemas/config.py index 496657030..c762d7f80 100644 --- a/src/axolotl/utils/schemas/config.py +++ b/src/axolotl/utils/schemas/config.py @@ -823,13 +823,6 @@ class AxolotlInputConfig( }, ) - unsloth_cross_entropy_loss: bool | None = None - unsloth_lora_mlp: bool | None = None - unsloth_lora_qkv: bool | None = None - unsloth_lora_o: bool | None = None - unsloth_rms_norm: bool | None = None - unsloth_rope: bool | None = None - lora_mlp_kernel: bool | None = Field( default=None, json_schema_extra={ @@ -1469,21 +1462,6 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig): ) return data - @model_validator(mode="before") - @classmethod - def check_multigpu_unsloth(cls, data): - if ( - data.get("unsloth_lora_mlp") - or data.get("unsloth_lora_qkv") - or data.get("unsloth_lora_o") - ): - capabilities = data.get("capabilities") - if capabilities and capabilities.get("n_gpu", 0) > 1: - raise ValueError( - "unsloth_lora_mlp, unsloth_lora_qkv, and unsloth_lora_o are not compatible with multi-GPU training." - ) - return data - @model_validator(mode="before") @classmethod def check_multigpu_lora_kernels(cls, data): @@ -1537,8 +1515,7 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig): # RL trainers not tested so don't enable kernels by default return data if data.get("adapter") in ["lora", "qlora"]: - # Skip if already set, using unsloth optimizations, or using 8-bit - unsloth_fields = ["unsloth_lora_mlp", "unsloth_lora_qkv", "unsloth_lora_o"] + # Skip if already set or using 8-bit kernel_fields = [ "lora_mlp_kernel", "lora_qkv_kernel", @@ -1547,7 +1524,6 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig): ] if ( any(data.get(k) is not None for k in kernel_fields) - or any(data.get(k) for k in unsloth_fields) or data.get("adapter") == "lora" and data.get("load_in_8bit") ): diff --git a/src/axolotl/utils/schemas/validation.py b/src/axolotl/utils/schemas/validation.py index ff7813600..1780a9cc8 100644 --- a/src/axolotl/utils/schemas/validation.py +++ b/src/axolotl/utils/schemas/validation.py @@ -52,6 +52,26 @@ class DatasetValidationMixin: return datasets + @model_validator(mode="before") + @classmethod + def check_deprecated_unsloth_fields(cls, data): + deprecated_fields = [ + "unsloth_cross_entropy_loss", + "unsloth_lora_mlp", + "unsloth_lora_qkv", + "unsloth_lora_o", + "unsloth_rms_norm", + "unsloth_rope", + ] + found = [f for f in deprecated_fields if data.get(f)] + if found: + raise ValueError( + f"`{'`, `'.join(found)}` {'has' if len(found) == 1 else 'have'} been removed. " + "Please use `lora_mlp_kernel`, `lora_qkv_kernel`, `lora_o_kernel` instead. " + "See: https://docs.axolotl.ai/docs/lora_optims.html" + ) + return data + @model_validator(mode="before") @classmethod def check_dataset_or_pretraining_dataset(cls, data): @@ -607,36 +627,6 @@ class LoRAValidationMixin: ) return data - @model_validator(mode="before") - @classmethod - def check_qlora_unsloth(cls, data): - if ( - data.get("unsloth_lora_mlp") - or data.get("unsloth_lora_qkv") - or data.get("unsloth_lora_o") - ): - if data.get("adapter") == "lora" and data.get("load_in_8bit"): - raise ValueError( - "unsloth_lora_mlp, unsloth_lora_qkv, and unsloth_lora_o are not compatible with 8-bit LoRA" - ) - return data - - @model_validator(mode="before") - @classmethod - def check_lora_axolotl_unsloth(cls, data): - is_lora_kernel = any( - data.get(k) for k in ["lora_mlp_kernel", "lora_qkv_kernel", "lora_o_kernel"] - ) - is_unsloth_lora = any( - data.get(k) - for k in ["unsloth_lora_mlp", "unsloth_lora_qkv", "unsloth_lora_o"] - ) - if is_lora_kernel and is_unsloth_lora: - raise ValueError( - "both lora_mlp_kernel and unsloth_lora_mlp cannot be true (similarly for lora_qkv_kernel, lora_o_kernel)" - ) - return data - @model_validator(mode="after") def check_fused_lora(self): if self.adapter in ["lora", "qlora"] and self.flash_attn_fuse_mlp: @@ -860,17 +850,6 @@ class OptimizationValidationMixin: return data - @model_validator(mode="before") - @classmethod - def check_xentropy_patch_conflicts(cls, data): - if data.get("flash_attn_cross_entropy") and data.get( - "unsloth_cross_entropy_loss" - ): - raise ValueError( - "flash_attn_cross_entropy and unsloth_cross_entropy_loss cannot be both enabled" - ) - return data - @model_validator(mode="before") @classmethod def check_cross_entropy_conflicts(cls, data): diff --git a/src/setuptools_axolotl_dynamic_dependencies.py b/src/setuptools_axolotl_dynamic_dependencies.py deleted file mode 100644 index 3bb54cda8..000000000 --- a/src/setuptools_axolotl_dynamic_dependencies.py +++ /dev/null @@ -1,102 +0,0 @@ -""" -dynamic requirements for axolotl -""" - -import platform -import re -from importlib.metadata import PackageNotFoundError, version - -from setuptools.command.build_py import build_py as _build_py - - -def parse_requirements(): - _install_requires = [] - _dependency_links = [] - with open("./requirements.txt", encoding="utf-8") as requirements_file: - lines = [r.strip() for r in requirements_file.readlines()] - for line in lines: - is_extras = ( - "flash-attn" in line - or "flash-attention" in line - or "deepspeed" in line - or "mamba-ssm" in line - or "lion-pytorch" in line - ) - if line.startswith("--extra-index-url"): - # Handle custom index URLs - _, url = line.split() - _dependency_links.append(url) - elif not is_extras and line and line[0] != "#": - # Handle standard packages - _install_requires.append(line) - - try: - xformers_version = [req for req in _install_requires if "xformers" in req][0] - torchao_version = [req for req in _install_requires if "torchao" in req][0] - - if "Darwin" in platform.system(): - # don't install xformers on MacOS - _install_requires.pop(_install_requires.index(xformers_version)) - else: - # detect the version of torch already installed - # and set it so dependencies don't clobber the torch version - try: - torch_version = version("torch") - except PackageNotFoundError: - torch_version = "2.5.1" - _install_requires.append(f"torch=={torch_version}") - - version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version) - if version_match: - major, minor, patch = version_match.groups() - major, minor = int(major), int(minor) - patch = ( - int(patch) if patch is not None else 0 - ) # Default patch to 0 if not present - else: - raise ValueError("Invalid version format") - - if (major, minor) >= (2, 5): - _install_requires.pop(_install_requires.index(xformers_version)) - if patch == 0: - _install_requires.append("xformers==0.0.28.post2") - else: - _install_requires.append("xformers==0.0.28.post3") - elif (major, minor) >= (2, 4): - if patch == 0: - _install_requires.pop(_install_requires.index(xformers_version)) - _install_requires.append("xformers>=0.0.27") - else: - _install_requires.pop(_install_requires.index(xformers_version)) - _install_requires.append("xformers==0.0.28.post1") - elif (major, minor) >= (2, 3): - _install_requires.pop(_install_requires.index(torchao_version)) - if patch == 0: - _install_requires.pop(_install_requires.index(xformers_version)) - _install_requires.append("xformers>=0.0.26.post1") - else: - _install_requires.pop(_install_requires.index(xformers_version)) - _install_requires.append("xformers>=0.0.27") - elif (major, minor) >= (2, 2): - _install_requires.pop(_install_requires.index(torchao_version)) - _install_requires.pop(_install_requires.index(xformers_version)) - _install_requires.append("xformers>=0.0.25.post1") - else: - _install_requires.pop(_install_requires.index(torchao_version)) - _install_requires.pop(_install_requires.index(xformers_version)) - _install_requires.append("xformers>=0.0.23.post1") - - except PackageNotFoundError: - pass - return _install_requires, _dependency_links - - -class BuildPyCommand(_build_py): - """ - custom build_py command to parse dynamic requirements - """ - - def finalize_options(self): - super().finalize_options() - install_requires, _ = parse_requirements() - self.distribution.install_requires = install_requires diff --git a/tests/conftest.py b/tests/conftest.py index f857dd363..19e3dc3f0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -325,10 +325,10 @@ def download_phi_4_reasoning_model_fixture(): @pytest.fixture(scope="session", autouse=True) -def download_phi_3_medium_model_fixture(): +def download_phi_3_mini_model_fixture(): # download the tokenizer only snapshot_download_w_retry( - "microsoft/Phi-3-medium-128k-instruct", + "microsoft/Phi-3-mini-4k-instruct", repo_type="model", allow_patterns=["*token*", "config.json"], ) diff --git a/tests/e2e/integrations/test_scattermoe_lora_olmoe.py b/tests/e2e/integrations/test_scattermoe_lora_olmoe.py index 1cd514b54..945a9173a 100644 --- a/tests/e2e/integrations/test_scattermoe_lora_olmoe.py +++ b/tests/e2e/integrations/test_scattermoe_lora_olmoe.py @@ -54,24 +54,8 @@ except (ImportError, ModuleNotFoundError): ) def peft_lora_to_scattermoe(peft_A, peft_B, num_experts, rank): - peft_B_em = peft_lora_B_to_scattermoe(peft_B, num_experts, rank) - K_inter, N_hidden = peft_B.shape[0], peft_A.shape[1] - smoe_A = torch.zeros( - rank * num_experts, - K_inter, - device=peft_A.device, - dtype=peft_A.dtype, - ) - smoe_B = torch.zeros( - N_hidden, - rank * num_experts, - device=peft_A.device, - dtype=peft_A.dtype, - ) - for e in range(num_experts): - s = e * rank - smoe_A[s : s + rank, :] = peft_B_em[:, s : s + rank].T - smoe_B[:, s : s + rank] = peft_A[s : s + rank, :].T + smoe_A = peft_A + smoe_B = peft_lora_B_to_scattermoe(peft_B, num_experts, rank) return smoe_A, smoe_B def _unwrap_experts_lora(experts_module): @@ -322,12 +306,14 @@ class TestLoRABLayoutConversion: hidden, inter = 32, 16 scaling = 2.0 - peft_A = torch.randn(E * r, hidden) - peft_B = torch.randn(inter, E * r) + # peft >=0.19.1 for down_proj [E, hidden, inter]: + # swaps in/out, lora_A [r*E, inter], lora_B [hidden, r*E] + peft_A = torch.randn(E * r, inter) + peft_B = torch.randn(hidden, E * r) - A_r = peft_A.reshape(E, r, hidden) - B_r = peft_B.reshape(inter, r, E) - delta_peft = torch.einsum("o r e, e r i -> e i o", B_r, A_r) * scaling + A_r = peft_A.reshape(E, r, inter) + B_r = peft_B.reshape(hidden, r, E) + delta_peft = torch.einsum("o r e, e r i -> e o i", B_r, A_r) * scaling smoe_A, smoe_B = peft_lora_to_scattermoe(peft_A, peft_B, E, r) for e in range(E): @@ -339,31 +325,22 @@ class TestLoRABLayoutConversion: ) def test_gate_up_proj_conversion(self): - """Verify gate_up_proj LoRA conversion with non-square dims (Qwen3-like). + """Verify gate_up_proj LoRA conversion with non-square dims. gate_up_proj param: [E, 2*inter, hidden]. - peft: in_features=2*inter, out_features=hidden. - peft lora_A: [r*E, 2*inter], lora_B: [hidden, r*E]. - - scattermoe W = param.T = [E, hidden, 2*inter], K=hidden, N=2*inter. + peft swaps in/out for 3D: lora_A [r*E, hidden], lora_B [2*inter, r*E]. scattermoe needs: lora_A [r*E, K=hidden], lora_B [N=2*inter, r*E]. - - Uses non-square dims (hidden=32 != 2*inter=24) to catch A<->B swap bugs. """ E, r = 4, 2 hidden, inter = 32, 12 # 2*inter=24 != hidden=32 scaling = 2.0 - # peft assigns: in_features=2*inter, out_features=hidden - peft_A = torch.randn(E * r, 2 * inter) # [r*E, in_features=2*inter] - peft_B = torch.randn(hidden, E * r) # [out_features=hidden, r*E] + peft_A = torch.randn(E * r, hidden) # [r*E, in=hidden] + peft_B = torch.randn(2 * inter, E * r) # [out=2*inter, r*E] - # peft delta via einsum: "o r e, e r i -> e i o" - A_r = peft_A.reshape(E, r, 2 * inter) - B_r = peft_B.reshape(hidden, r, E) - delta_peft = torch.einsum("o r e, e r i -> e i o", B_r, A_r) * scaling - # delta_peft[e] has shape [in_features, out_features] = [2*inter, hidden] - # = param[e] shape [2*inter, hidden] + A_r = peft_A.reshape(E, r, hidden) + B_r = peft_B.reshape(2 * inter, r, E) + delta_peft = torch.einsum("o r e, e r i -> e o i", B_r, A_r) * scaling smoe_A, smoe_B = peft_gate_up_lora_to_scattermoe(peft_A, peft_B, E, r) # smoe_A should be [r*E, K=hidden], smoe_B should be [N=2*inter, r*E] @@ -421,23 +398,21 @@ class TestPeftLoRAWeightExtraction: r, ) - # gate_up_proj [E, 2*inter, hidden] - # peft: in_features=2*inter (dim 1), out_features=hidden (dim 2) + # gate_up_proj [E, 2*inter, hidden] — peft swaps in/out for 3D assert trainable[ "base_model.model.moe.experts.base_layer.lora_A.default.weight" - ].shape == (E * r, 2 * config.intermediate_size) - assert trainable[ - "base_model.model.moe.experts.base_layer.lora_B.default.weight" - ].shape == (config.hidden_size, E * r) - - # down_proj [E, hidden, inter] - # peft: in_features=hidden (dim 1), out_features=inter (dim 2) - assert trainable[ - "base_model.model.moe.experts.lora_A.default.weight" ].shape == (E * r, config.hidden_size) + assert trainable[ + "base_model.model.moe.experts.base_layer.lora_B.default.weight" + ].shape == (2 * config.intermediate_size, E * r) + + # down_proj [E, hidden, inter] — peft swaps in/out for 3D + assert trainable[ + "base_model.model.moe.experts.lora_A.default.weight" + ].shape == (E * r, config.intermediate_size) assert trainable[ "base_model.model.moe.experts.lora_B.default.weight" - ].shape == (config.intermediate_size, E * r) + ].shape == (config.hidden_size, E * r) @requires_cuda def test_peft_forward_runs(self): @@ -488,8 +463,7 @@ class TestPeftLoRAWeightExtraction: assert gup_lora is not None, "gate_up_proj LoRA not detected" assert down_lora is not None, "down_proj LoRA not detected" - # Check shapes (after peft->scattermoe conversion with A<->B swap) - # gate_up_proj W = param.T = [E, hidden, 2*inter], K=hidden, N=2*inter + # gate_up_proj: K=hidden, N=2*inter E, r = config.num_experts, 4 gup_A, gup_B, gup_s = gup_lora assert gup_A.shape == (E * r, config.hidden_size), ( @@ -501,7 +475,7 @@ class TestPeftLoRAWeightExtraction: f"{(2 * config.intermediate_size, E * r)}, got {gup_B.shape}" ) - # down_proj W = param.T = [E, inter, hidden], K=inter, N=hidden + # down_proj: K=inter, N=hidden down_A, down_B, down_s = down_lora assert down_A.shape == (E * r, config.intermediate_size), ( f"down_proj smoe_A: expected [r*E, K=inter]={(E * r, config.intermediate_size)}, " diff --git a/tests/e2e/patched/test_unsloth_integration.py b/tests/e2e/patched/test_unsloth_integration.py deleted file mode 100644 index 4cd97c894..000000000 --- a/tests/e2e/patched/test_unsloth_integration.py +++ /dev/null @@ -1,21 +0,0 @@ -"""Test module for checking whether the integration of Unsloth with Hugging Face Transformers is working as expected.""" - -import unittest - -import pytest - - -@pytest.mark.skip( - reason="Unsloth integration will be broken going into latest transformers" -) -class TestUnslothIntegration(unittest.TestCase): - """Unsloth monkeypatch integration tests.""" - - def test_is_self_attn_patchable(self): - from axolotl.monkeypatch.unsloth_ import check_self_attn_is_patchable - - # ensures the current version of transformers has loss code that matches our patching code - self.assertTrue( - check_self_attn_is_patchable(), - "HF transformers self attention code has changed and isn't patchable", - ) diff --git a/tests/e2e/patched/test_unsloth_qlora.py b/tests/e2e/patched/test_unsloth_qlora.py deleted file mode 100644 index bf00e8a5f..000000000 --- a/tests/e2e/patched/test_unsloth_qlora.py +++ /dev/null @@ -1,184 +0,0 @@ -""" -e2e tests for unsloth qlora -""" - -import pytest - -from axolotl.common.datasets import load_datasets -from axolotl.train import train -from axolotl.utils.config import normalize_config, validate_config -from axolotl.utils.dict import DictDefault - -from ..utils import check_model_output_exists, check_tensorboard - - -@pytest.mark.skip( - reason="Unsloth integration will be broken going into latest transformers" -) -class TestUnslothQLoRA: - """ - Test class for Unsloth QLoRA Llama models - """ - - @pytest.mark.parametrize( - "sample_packing", - [True, False], - ) - def test_unsloth_llama_qlora_fa2(self, temp_dir, sample_packing): - cfg = DictDefault( - { - "base_model": "HuggingFaceTB/SmolLM2-135M", - "sequence_len": 1024, - "sample_packing": sample_packing, - "flash_attention": True, - "unsloth_lora_mlp": True, - "unsloth_lora_qkv": True, - "unsloth_lora_o": True, - "load_in_4bit": True, - "adapter": "qlora", - "lora_r": 16, - "lora_alpha": 16, - "lora_dropout": 0.05, - "lora_target_linear": True, - "val_set_size": 0.05, - "special_tokens": { - "pad_token": "<|endoftext|>", - }, - "datasets": [ - { - "path": "mhenrichsen/alpaca_2k_test", - "type": "alpaca", - }, - ], - "num_epochs": 1, - "max_steps": 5, - "save_steps": 10, - "micro_batch_size": 4, - "gradient_accumulation_steps": 2, - "output_dir": temp_dir, - "learning_rate": 0.00001, - "optimizer": "adamw_8bit", - "lr_scheduler": "cosine", - "use_tensorboard": True, - "bf16": "auto", - "save_first_step": False, - } - ) - - cfg = validate_config(cfg) - normalize_config(cfg) - dataset_meta = load_datasets(cfg=cfg) - - train(cfg=cfg, dataset_meta=dataset_meta) - check_model_output_exists(temp_dir, cfg) - - check_tensorboard( - temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss (%s) is too high" - ) - - def test_unsloth_llama_qlora_unpacked(self, temp_dir): - cfg = DictDefault( - { - "base_model": "HuggingFaceTB/SmolLM2-135M", - "sequence_len": 1024, - "unsloth_lora_mlp": True, - "unsloth_lora_qkv": True, - "unsloth_lora_o": True, - "sample_packing": False, - "load_in_4bit": True, - "adapter": "qlora", - "lora_r": 16, - "lora_alpha": 16, - "lora_dropout": 0.05, - "lora_target_linear": True, - "val_set_size": 0.05, - "special_tokens": { - "pad_token": "<|endoftext|>", - }, - "datasets": [ - { - "path": "mhenrichsen/alpaca_2k_test", - "type": "alpaca", - }, - ], - "num_epochs": 1, - "max_steps": 5, - "save_steps": 10, - "micro_batch_size": 4, - "gradient_accumulation_steps": 2, - "output_dir": temp_dir, - "learning_rate": 0.00001, - "optimizer": "adamw_8bit", - "lr_scheduler": "cosine", - "use_tensorboard": True, - "bf16": "auto", - "save_first_step": False, - } - ) - - cfg = validate_config(cfg) - normalize_config(cfg) - dataset_meta = load_datasets(cfg=cfg) - - train(cfg=cfg, dataset_meta=dataset_meta) - check_model_output_exists(temp_dir, cfg) - - check_tensorboard( - temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss (%s) is too high" - ) - - @pytest.mark.parametrize( - "sdp_attention", - [True, False], - ) - def test_unsloth_llama_qlora_unpacked_no_fa2_fp16(self, temp_dir, sdp_attention): - cfg = DictDefault( - { - "base_model": "HuggingFaceTB/SmolLM2-135M", - "sequence_len": 1024, - "unsloth_lora_mlp": True, - "unsloth_lora_qkv": True, - "unsloth_lora_o": True, - "sample_packing": False, - "load_in_4bit": True, - "adapter": "qlora", - "lora_r": 16, - "lora_alpha": 16, - "lora_dropout": 0.05, - "lora_target_linear": True, - "val_set_size": 0.05, - "special_tokens": { - "pad_token": "<|endoftext|>", - }, - "datasets": [ - { - "path": "mhenrichsen/alpaca_2k_test", - "type": "alpaca", - }, - ], - "num_epochs": 1, - "max_steps": 5, - "save_steps": 10, - "micro_batch_size": 4, - "gradient_accumulation_steps": 2, - "sdp_attention": sdp_attention, - "output_dir": temp_dir, - "learning_rate": 0.00001, - "optimizer": "adamw_8bit", - "lr_scheduler": "cosine", - "use_tensorboard": True, - "fp16": True, - "save_first_step": False, - } - ) - - cfg = validate_config(cfg) - normalize_config(cfg) - dataset_meta = load_datasets(cfg=cfg) - - train(cfg=cfg, dataset_meta=dataset_meta) - check_model_output_exists(temp_dir, cfg) - - check_tensorboard( - temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss (%s) is too high" - ) diff --git a/tests/prompt_strategies/test_dpo_chat_templates.py b/tests/prompt_strategies/test_dpo_chat_templates.py index 74c98204c..28028ce42 100644 --- a/tests/prompt_strategies/test_dpo_chat_templates.py +++ b/tests/prompt_strategies/test_dpo_chat_templates.py @@ -111,7 +111,7 @@ def fixture_argilla_chat_dataset(): @pytest.fixture(name="phi3_tokenizer") @enable_hf_offline def fixture_phi3_tokenizer(): - tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-medium-128k-instruct") + tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") return tokenizer @@ -214,8 +214,8 @@ class TestAssistantDPOChatTemplatePhi3: + "<|user|>\ngoodbye<|end|>\n" + "<|assistant|>\n" ) - assert result["chosen"] == "goodbye<|end|>" - assert result["rejected"] == "party on<|end|>" + assert result["chosen"] == "goodbye<|end|>\n<|endoftext|>" + assert result["rejected"] == "party on<|end|>\n<|endoftext|>" class TestAssistantDPOChatTemplateGemma: @@ -290,8 +290,8 @@ class TestArgillaChatDPOChatTemplate: ) result = transform_fn(argilla_chat_dataset[0], tokenizer=phi3_tokenizer) assert result["prompt"] == "<|user|>\nhello<|end|>\n" + "<|assistant|>\n" - assert result["chosen"] == "goodbye<|end|>" - assert result["rejected"] == "party on<|end|>" + assert result["chosen"] == "goodbye<|end|>\n<|endoftext|>" + assert result["rejected"] == "party on<|end|>\n<|endoftext|>" class TestDPOChatTemplateToolRole: