Compare commits
63 Commits
uv-first
...
fix/diffus
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
08c8f3f22f | ||
|
|
76f0fe2621 | ||
|
|
c6ddcdd06a | ||
|
|
7fb6a947d9 | ||
|
|
b234532d9f | ||
|
|
8990ca3205 | ||
|
|
006f226270 | ||
|
|
0b635e69c5 | ||
|
|
0d27e14e45 | ||
|
|
f5f21fb216 | ||
|
|
4e55871112 | ||
|
|
a6bafb55cb | ||
|
|
0fbde69e9c | ||
|
|
301e22849f | ||
|
|
dcf24fd24e | ||
|
|
49b8107989 | ||
|
|
9901ee5602 | ||
|
|
dd78f2e0cc | ||
|
|
b54f9c942b | ||
|
|
11eb36585a | ||
|
|
d0c846fc5e | ||
|
|
b5fcc2f14b | ||
|
|
b62eed8809 | ||
|
|
ed2e8cacd6 | ||
|
|
80270a92fa | ||
|
|
bfdc9a8249 | ||
|
|
c37decb073 | ||
|
|
01a346d86a | ||
|
|
26f05b6008 | ||
|
|
ed58fa8a75 | ||
|
|
633afffacb | ||
|
|
4b1b4fa6d8 | ||
|
|
0f7c886b7b | ||
|
|
a4b921135b | ||
|
|
98333e639a | ||
|
|
9d4d39e939 | ||
|
|
bb33fda44d | ||
|
|
4dc018992d | ||
|
|
243620394a | ||
|
|
3750fdcf79 | ||
|
|
613bcf90e5 | ||
|
|
383f220cfd | ||
|
|
8bb871b5cf | ||
|
|
87565ecc05 | ||
|
|
93ba57396f | ||
|
|
aa1240acd8 | ||
|
|
4cdfdfebb5 | ||
|
|
6e2f5ccf9f | ||
|
|
8c7f63cf97 | ||
|
|
cd856b45b1 | ||
|
|
143dea4753 | ||
|
|
bc2ffb8204 | ||
|
|
153edcfe79 | ||
|
|
08b8fa62cc | ||
|
|
3a5c97e6e5 | ||
|
|
37f78c8592 | ||
|
|
ab63b92c38 | ||
|
|
6f8ce024d1 | ||
|
|
d0e9c3c1c5 | ||
|
|
4c3488cc9f | ||
|
|
130637a3fa | ||
|
|
377c510e95 | ||
|
|
409cfb8a87 |
@@ -2,6 +2,7 @@
|
|||||||
source = axolotl
|
source = axolotl
|
||||||
omit =
|
omit =
|
||||||
*/tests/*
|
*/tests/*
|
||||||
|
setup.py
|
||||||
|
|
||||||
[report]
|
[report]
|
||||||
exclude_lines =
|
exclude_lines =
|
||||||
|
|||||||
15
.github/CONTRIBUTING.md
vendored
15
.github/CONTRIBUTING.md
vendored
@@ -29,18 +29,13 @@ PRs are **greatly welcome**!
|
|||||||
2. Set up the development environment by following the instructions in the [README.md](https://github.com/axolotl-ai-cloud/axolotl/tree/main/README.md) file.
|
2. Set up the development environment by following the instructions in the [README.md](https://github.com/axolotl-ai-cloud/axolotl/tree/main/README.md) file.
|
||||||
3. Explore the codebase, run tests, and verify that everything works as expected.
|
3. Explore the codebase, run tests, and verify that everything works as expected.
|
||||||
|
|
||||||
Please run the below to setup:
|
Please run below to setup env
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
cd axolotl
|
pre-commit install
|
||||||
|
|
||||||
uv sync --dev && uv pip install flash-attn --no-build-isolation
|
# test
|
||||||
source .venv/bin/activate
|
pytest tests/
|
||||||
|
|
||||||
pre-commit install # install pre-commit hooks
|
|
||||||
|
|
||||||
pytest tests/ # optional; run test suite
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## How to Contribute
|
## How to Contribute
|
||||||
|
|||||||
6
.github/FUNDING.yml
vendored
6
.github/FUNDING.yml
vendored
@@ -1,13 +1,13 @@
|
|||||||
# These are supported funding model platforms
|
# These are supported funding model platforms
|
||||||
|
|
||||||
github: [winglian, OpenAccess-AI-Collective] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
||||||
patreon: # Replace with a single Patreon username
|
patreon: # Replace with a single Patreon username
|
||||||
open_collective: # Replace with a single Open Collective username
|
open_collective: # Replace with a single Open Collective username
|
||||||
ko_fi: axolotl_ai # Replace with a single Ko-fi username
|
ko_fi: # Replace with a single Ko-fi username
|
||||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
||||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
||||||
liberapay: # Replace with a single Liberapay username
|
liberapay: # Replace with a single Liberapay username
|
||||||
issuehunt: # Replace with a single IssueHunt username
|
issuehunt: # Replace with a single IssueHunt username
|
||||||
otechie: # Replace with a single Otechie username
|
otechie: # Replace with a single Otechie username
|
||||||
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
||||||
custom: ['https://quickchart.io/qr?text=bitcoin%3Abc1qxlgwlqwfea5s2cxm42xqsfmwjct0rj8w8ea5np&size=480¢erImageUrl=https%3A%2F%2Fupload.wikimedia.org%2Fwikipedia%2Fcommons%2Fthumb%2F4%2F46%2FBitcoin.svg%2F64px-Bitcoin.svg.png'] # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
|
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
|
||||||
|
|||||||
49
.github/workflows/base.yml
vendored
49
.github/workflows/base.yml
vendored
@@ -25,18 +25,11 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: "124"
|
|
||||||
cuda_version: 12.4.1
|
|
||||||
cudnn_version: ""
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.6.0
|
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
|
||||||
dockerfile: "Dockerfile-base"
|
|
||||||
- cuda: "126"
|
- cuda: "126"
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
cudnn_version: ""
|
cudnn_version: ""
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.7.0
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
dockerfile: "Dockerfile-base"
|
dockerfile: "Dockerfile-base"
|
||||||
- cuda: "126"
|
- cuda: "126"
|
||||||
@@ -60,6 +53,20 @@ jobs:
|
|||||||
pytorch: 2.8.0
|
pytorch: 2.8.0
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
dockerfile: "Dockerfile-base"
|
dockerfile: "Dockerfile-base"
|
||||||
|
- cuda: "128"
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
cudnn_version: ""
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.1
|
||||||
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
|
dockerfile: "Dockerfile-base"
|
||||||
|
- cuda: "130"
|
||||||
|
cuda_version: 13.0.0
|
||||||
|
cudnn_version: ""
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.1
|
||||||
|
torch_cuda_arch_list: "9.0+PTX"
|
||||||
|
dockerfile: "Dockerfile-base"
|
||||||
# - cuda: "128"
|
# - cuda: "128"
|
||||||
# cuda_version: 12.8.1
|
# cuda_version: 12.8.1
|
||||||
# cudnn_version: ""
|
# cudnn_version: ""
|
||||||
@@ -83,7 +90,6 @@ jobs:
|
|||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
winglian/axolotl-base
|
|
||||||
axolotlai/axolotl-base
|
axolotlai/axolotl-base
|
||||||
- name: Login to Docker Hub
|
- name: Login to Docker Hub
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v2
|
||||||
@@ -98,9 +104,7 @@ jobs:
|
|||||||
context: .
|
context: .
|
||||||
file: ./docker/${{ matrix.dockerfile }}
|
file: ./docker/${{ matrix.dockerfile }}
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
|
||||||
${{ steps.metadata.outputs.tags }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
|
||||||
labels: ${{ steps.metadata.outputs.labels }}
|
labels: ${{ steps.metadata.outputs.labels }}
|
||||||
build-args: |
|
build-args: |
|
||||||
CUDA_VERSION=${{ matrix.cuda_version }}
|
CUDA_VERSION=${{ matrix.cuda_version }}
|
||||||
@@ -117,13 +121,6 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: "126"
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
cudnn_version: ""
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.6.0
|
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
|
||||||
dockerfile: "Dockerfile-uv-base"
|
|
||||||
- cuda: "126"
|
- cuda: "126"
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
cudnn_version: ""
|
cudnn_version: ""
|
||||||
@@ -145,6 +142,20 @@ jobs:
|
|||||||
pytorch: 2.8.0
|
pytorch: 2.8.0
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
dockerfile: "Dockerfile-uv-base"
|
dockerfile: "Dockerfile-uv-base"
|
||||||
|
- cuda: "128"
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
cudnn_version: ""
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.1
|
||||||
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
|
dockerfile: "Dockerfile-uv-base"
|
||||||
|
- cuda: "130"
|
||||||
|
cuda_version: 13.0.0
|
||||||
|
cudnn_version: ""
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.1
|
||||||
|
torch_cuda_arch_list: "9.0+PTX"
|
||||||
|
dockerfile: "Dockerfile-uv-base"
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|||||||
8
.github/workflows/docs.yml
vendored
8
.github/workflows/docs.yml
vendored
@@ -20,14 +20,10 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system jupyter quartodoc
|
python3 -m pip install jupyter quartodoc
|
||||||
uv pip install --system -e .
|
python3 -m pip install -e .
|
||||||
- name: Build autodoc
|
- name: Build autodoc
|
||||||
run: quartodoc build
|
run: quartodoc build
|
||||||
- name: Publish to GitHub Pages (and render)
|
- name: Publish to GitHub Pages (and render)
|
||||||
|
|||||||
3
.github/workflows/lint.yml
vendored
3
.github/workflows/lint.yml
vendored
@@ -6,7 +6,7 @@ on:
|
|||||||
types: [opened, synchronize, reopened, ready_for_review]
|
types: [opened, synchronize, reopened, ready_for_review]
|
||||||
paths:
|
paths:
|
||||||
- '**.py'
|
- '**.py'
|
||||||
- 'pyproject.toml'
|
- 'requirements.txt'
|
||||||
- '.github/workflows/*.yml'
|
- '.github/workflows/*.yml'
|
||||||
- "*.[q]md"
|
- "*.[q]md"
|
||||||
- "examples/**/*.y[a]?ml"
|
- "examples/**/*.y[a]?ml"
|
||||||
@@ -23,4 +23,5 @@ jobs:
|
|||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
cache: 'pip' # caching pip dependencies
|
||||||
- uses: pre-commit/action@v3.0.1
|
- uses: pre-commit/action@v3.0.1
|
||||||
|
|||||||
42
.github/workflows/main.yml
vendored
42
.github/workflows/main.yml
vendored
@@ -18,14 +18,13 @@ jobs:
|
|||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.7.0
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.7.1
|
pytorch: 2.7.1
|
||||||
axolotl_extras: vllm
|
axolotl_extras: vllm
|
||||||
is_latest: true
|
|
||||||
- cuda: 128
|
- cuda: 128
|
||||||
cuda_version: 12.8.1
|
cuda_version: 12.8.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -36,6 +35,17 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.8.0
|
pytorch: 2.8.0
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
|
is_latest: true
|
||||||
|
- cuda: 128
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.0
|
||||||
|
axolotl_extras:
|
||||||
|
- cuda: 128
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.1
|
||||||
|
axolotl_extras:
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -45,7 +55,6 @@ jobs:
|
|||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
winglian/axolotl
|
|
||||||
axolotlai/axolotl
|
axolotlai/axolotl
|
||||||
tags: |
|
tags: |
|
||||||
type=ref,event=branch
|
type=ref,event=branch
|
||||||
@@ -68,8 +77,6 @@ jobs:
|
|||||||
PYTORCH_VERSION=${{ matrix.pytorch }}
|
PYTORCH_VERSION=${{ matrix.pytorch }}
|
||||||
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
||||||
AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}
|
AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}
|
||||||
GIT_REF=${{ github.ref }}
|
|
||||||
GIT_SHA=${{ github.sha }}
|
|
||||||
file: ./docker/Dockerfile
|
file: ./docker/Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
@@ -88,7 +95,7 @@ jobs:
|
|||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.7.0
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
@@ -101,7 +108,6 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.7.1
|
pytorch: 2.7.1
|
||||||
axolotl_extras: vllm
|
axolotl_extras: vllm
|
||||||
is_latest: true
|
|
||||||
- cuda: 128
|
- cuda: 128
|
||||||
cuda_version: 12.8.1
|
cuda_version: 12.8.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -112,6 +118,17 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.8.0
|
pytorch: 2.8.0
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
|
is_latest: true
|
||||||
|
- cuda: 128
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.0
|
||||||
|
axolotl_extras:
|
||||||
|
- cuda: 128
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.1
|
||||||
|
axolotl_extras:
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -121,7 +138,6 @@ jobs:
|
|||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
winglian/axolotl-cloud
|
|
||||||
axolotlai/axolotl-cloud
|
axolotlai/axolotl-cloud
|
||||||
tags: |
|
tags: |
|
||||||
type=ref,event=branch
|
type=ref,event=branch
|
||||||
@@ -140,8 +156,6 @@ jobs:
|
|||||||
build-args: |
|
build-args: |
|
||||||
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
CUDA=${{ matrix.cuda }}
|
CUDA=${{ matrix.cuda }}
|
||||||
GIT_REF=${{ github.ref }}
|
|
||||||
GIT_SHA=${{ github.sha }}
|
|
||||||
file: ./docker/Dockerfile-cloud
|
file: ./docker/Dockerfile-cloud
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
@@ -156,11 +170,6 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.6.0
|
|
||||||
axolotl_extras:
|
|
||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -188,7 +197,6 @@ jobs:
|
|||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
winglian/axolotl-cloud-term
|
|
||||||
axolotlai/axolotl-cloud-term
|
axolotlai/axolotl-cloud-term
|
||||||
tags: |
|
tags: |
|
||||||
type=ref,event=branch
|
type=ref,event=branch
|
||||||
@@ -207,8 +215,6 @@ jobs:
|
|||||||
build-args: |
|
build-args: |
|
||||||
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
CUDA=${{ matrix.cuda }}
|
CUDA=${{ matrix.cuda }}
|
||||||
GIT_REF=${{ github.ref }}
|
|
||||||
GIT_SHA=${{ github.sha }}
|
|
||||||
file: ./docker/Dockerfile-cloud-no-tmux
|
file: ./docker/Dockerfile-cloud-no-tmux
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
|
|||||||
26
.github/workflows/multi-gpu-e2e.yml
vendored
26
.github/workflows/multi-gpu-e2e.yml
vendored
@@ -4,6 +4,8 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- 'tests/e2e/multigpu/**.py'
|
- 'tests/e2e/multigpu/**.py'
|
||||||
|
- 'requirements.txt'
|
||||||
|
- 'setup.py'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
- '.github/workflows/multi-gpu-e2e.yml'
|
- '.github/workflows/multi-gpu-e2e.yml'
|
||||||
- 'src/axolotl/core/trainers/mixins/sequence_parallel.py'
|
- 'src/axolotl/core/trainers/mixins/sequence_parallel.py'
|
||||||
@@ -24,13 +26,6 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.6.0
|
|
||||||
axolotl_extras:
|
|
||||||
num_gpus: 2
|
|
||||||
nightly_build: "true"
|
|
||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -45,6 +40,13 @@ jobs:
|
|||||||
axolotl_extras: fbgemm-gpu
|
axolotl_extras: fbgemm-gpu
|
||||||
num_gpus: 2
|
num_gpus: 2
|
||||||
nightly_build: "true"
|
nightly_build: "true"
|
||||||
|
- cuda: 128
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.0
|
||||||
|
axolotl_extras: fbgemm-gpu
|
||||||
|
num_gpus: 2
|
||||||
|
nightly_build: "true"
|
||||||
runs-on: [self-hosted, modal]
|
runs-on: [self-hosted, modal]
|
||||||
timeout-minutes: 120
|
timeout-minutes: 120
|
||||||
steps:
|
steps:
|
||||||
@@ -54,17 +56,13 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==1.0.2 jinja2 protobuf
|
pip install modal==1.0.2 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
@@ -74,4 +72,4 @@ jobs:
|
|||||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run -m cicd.multigpu
|
modal run cicd.multigpu
|
||||||
|
|||||||
26
.github/workflows/nightlies.yml
vendored
26
.github/workflows/nightlies.yml
vendored
@@ -12,16 +12,16 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.6.0
|
|
||||||
axolotl_extras:
|
|
||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.7.1
|
pytorch: 2.7.1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
|
- cuda: 128
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.8.0
|
||||||
|
axolotl_extras:
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -31,7 +31,6 @@ jobs:
|
|||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
winglian/axolotl
|
|
||||||
axolotlai/axolotl
|
axolotlai/axolotl
|
||||||
tags: |
|
tags: |
|
||||||
type=raw,value={{ branch }}-{{ date 'YYYYMMDD' }}
|
type=raw,value={{ branch }}-{{ date 'YYYYMMDD' }}
|
||||||
@@ -52,8 +51,6 @@ jobs:
|
|||||||
CUDA=${{ matrix.cuda }}
|
CUDA=${{ matrix.cuda }}
|
||||||
PYTORCH_VERSION=${{ matrix.pytorch }}
|
PYTORCH_VERSION=${{ matrix.pytorch }}
|
||||||
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
||||||
GIT_REF=${{ github.ref }}
|
|
||||||
GIT_SHA=${{ github.sha }}
|
|
||||||
file: ./docker/Dockerfile
|
file: ./docker/Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
@@ -67,16 +64,16 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.6.0
|
|
||||||
axolotl_extras:
|
|
||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.7.1
|
pytorch: 2.7.1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
|
- cuda: 128
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.8.0
|
||||||
|
axolotl_extras:
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -86,7 +83,6 @@ jobs:
|
|||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
winglian/axolotl-cloud
|
|
||||||
axolotlai/axolotl-cloud
|
axolotlai/axolotl-cloud
|
||||||
tags: |
|
tags: |
|
||||||
type=raw,value={{ branch }}-{{ date 'YYYYMMDD' }}
|
type=raw,value={{ branch }}-{{ date 'YYYYMMDD' }}
|
||||||
@@ -104,8 +100,6 @@ jobs:
|
|||||||
build-args: |
|
build-args: |
|
||||||
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
CUDA=${{ matrix.cuda }}
|
CUDA=${{ matrix.cuda }}
|
||||||
GIT_REF=${{ github.ref }}
|
|
||||||
GIT_SHA=${{ github.sha }}
|
|
||||||
file: ./docker/Dockerfile-cloud
|
file: ./docker/Dockerfile-cloud
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
|
|||||||
9
.github/workflows/precommit-autoupdate.yml
vendored
9
.github/workflows/precommit-autoupdate.yml
vendored
@@ -2,7 +2,7 @@ name: Pre-commit auto-update
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 0 * * 0' # Run weekly
|
- cron: '0 0 1 * *' # Run monthly
|
||||||
workflow_dispatch: # Manual kickoff
|
workflow_dispatch: # Manual kickoff
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
@@ -18,15 +18,10 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
|
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
|
|
||||||
- name: Update pre-commit hooks
|
- name: Update pre-commit hooks
|
||||||
id: update
|
id: update
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system pre-commit
|
pip install pre-commit
|
||||||
pre-commit autoupdate
|
pre-commit autoupdate
|
||||||
if [[ -n $(git status --porcelain) ]]; then
|
if [[ -n $(git status --porcelain) ]]; then
|
||||||
echo "changes=true" >> $GITHUB_OUTPUT
|
echo "changes=true" >> $GITHUB_OUTPUT
|
||||||
|
|||||||
9
.github/workflows/preview-docs.yml
vendored
9
.github/workflows/preview-docs.yml
vendored
@@ -40,15 +40,10 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
|
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system jupyter quartodoc
|
python3 -m pip install jupyter quartodoc
|
||||||
uv pip install --system -e .
|
python3 -m pip install -e .
|
||||||
|
|
||||||
- name: Build autodoc
|
- name: Build autodoc
|
||||||
run: quartodoc build
|
run: quartodoc build
|
||||||
|
|||||||
21
.github/workflows/pypi.yml
vendored
21
.github/workflows/pypi.yml
vendored
@@ -38,24 +38,23 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system wheel packaging==23.2
|
pip3 install wheel packaging==23.2
|
||||||
uv pip install --system --no-build-isolation -e ".[dev]"
|
pip3 install --no-build-isolation -e .
|
||||||
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
|
||||||
- name: Extract tag name
|
- name: Extract tag name
|
||||||
id: tag
|
id: tag
|
||||||
run: echo "TAG_NAME=$(echo "$GITHUB_REF" | cut -d / -f 3)" >> "$GITHUB_OUTPUT"
|
run: echo ::set-output name=TAG_NAME::$(echo $GITHUB_REF | cut -d / -f 3)
|
||||||
|
|
||||||
- name: Build package
|
- name: Update version in setup.py
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system build
|
sed -i -E 's/version="([0-9.]+)",/version="${{ steps.tag.outputs.TAG_NAME }}",/g' setup.py
|
||||||
python -m build
|
|
||||||
|
- name: Build a source dist
|
||||||
|
run: |
|
||||||
|
python setup.py sdist
|
||||||
|
|
||||||
- name: Publish package distributions to PyPI
|
- name: Publish package distributions to PyPI
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
|
|||||||
62
.github/workflows/tests-nightly.yml
vendored
62
.github/workflows/tests-nightly.yml
vendored
@@ -13,6 +13,7 @@ jobs:
|
|||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
cache: 'pip' # caching pip dependencies
|
||||||
- uses: pre-commit/action@v3.0.1
|
- uses: pre-commit/action@v3.0.1
|
||||||
env:
|
env:
|
||||||
SKIP: no-commit-to-branch
|
SKIP: no-commit-to-branch
|
||||||
@@ -25,7 +26,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
python_version: ["3.11"]
|
python_version: ["3.11"]
|
||||||
pytorch_version: ["2.6.0", "2.7.0"]
|
pytorch_version: ["2.7.1", "2.8.0"]
|
||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -42,30 +43,32 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python_version }}
|
python-version: ${{ matrix.python_version }}
|
||||||
|
cache: 'pip' # caching pip dependencies
|
||||||
|
|
||||||
- name: Install uv
|
- name: upgrade pip
|
||||||
uses: astral-sh/setup-uv@v4
|
run: |
|
||||||
with:
|
pip3 install --upgrade pip
|
||||||
version: "latest"
|
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
|
||||||
|
|
||||||
- name: Install PyTorch
|
- name: Install PyTorch
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system torch==${{ matrix.pytorch_version }} torchvision
|
pip3 install torch==${{ matrix.pytorch_version }} torchvision
|
||||||
|
|
||||||
- name: Update pyproject.toml for nightly builds
|
- name: Update requirements.txt
|
||||||
run: |
|
run: |
|
||||||
sed -i 's#"transformers==.*"#"transformers @ git+https://github.com/huggingface/transformers.git@main"#' pyproject.toml
|
sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt
|
||||||
sed -i 's#"peft==.*"#"peft @ git+https://github.com/huggingface/peft.git@main"#' pyproject.toml
|
sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt
|
||||||
sed -i 's#"accelerate==.*"#"accelerate @ git+https://github.com/huggingface/accelerate.git@main"#' pyproject.toml
|
sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt
|
||||||
sed -i 's#"trl==.*"#"trl @ git+https://github.com/huggingface/trl.git@main"#' pyproject.toml
|
sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt
|
||||||
sed -i 's#"datasets==.*"#"datasets @ git+https://github.com/huggingface/datasets.git@main"#' pyproject.toml
|
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
uv pip show --system torch
|
pip3 show torch
|
||||||
uv pip install --system --no-build-isolation -e ".[dev]"
|
pip3 install --no-build-isolation -U -e .
|
||||||
python scripts/unsloth_install.py | sh
|
python scripts/unsloth_install.py | sh
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
- name: Make sure PyTorch version wasn't clobbered
|
||||||
run: |
|
run: |
|
||||||
@@ -81,6 +84,9 @@ jobs:
|
|||||||
pytest -v --durations=10 tests/patched/
|
pytest -v --durations=10 tests/patched/
|
||||||
pytest -v --durations=10 tests/cli/
|
pytest -v --durations=10 tests/cli/
|
||||||
|
|
||||||
|
- name: cleanup pip cache
|
||||||
|
run: |
|
||||||
|
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||||
|
|
||||||
docker-e2e-tests:
|
docker-e2e-tests:
|
||||||
if: github.repository_owner == 'axolotl-ai-cloud'
|
if: github.repository_owner == 'axolotl-ai-cloud'
|
||||||
@@ -96,14 +102,14 @@ jobs:
|
|||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.7.1
|
||||||
num_gpus: 1
|
num_gpus: 1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
nightly_build: "true"
|
nightly_build: "true"
|
||||||
- cuda: 126
|
- cuda: 128
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.8.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.7.1
|
pytorch: 2.8.0
|
||||||
num_gpus: 1
|
num_gpus: 1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
nightly_build: "true"
|
nightly_build: "true"
|
||||||
@@ -114,16 +120,13 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system modal==1.0.2 jinja2
|
python -m pip install --upgrade pip
|
||||||
|
pip install modal==1.0.2 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
@@ -133,7 +136,7 @@ jobs:
|
|||||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run -m cicd.e2e_tests
|
modal run cicd.e2e_tests
|
||||||
docker-e2e-multigpu-tests:
|
docker-e2e-multigpu-tests:
|
||||||
if: github.repository_owner == 'axolotl-ai-cloud'
|
if: github.repository_owner == 'axolotl-ai-cloud'
|
||||||
# this job needs to be run on self-hosted GPU runners...
|
# this job needs to be run on self-hosted GPU runners...
|
||||||
@@ -159,16 +162,13 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system modal==1.0.2 jinja2
|
python -m pip install --upgrade pip
|
||||||
|
pip install modal==1.0.2 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
|
|||||||
142
.github/workflows/tests.yml
vendored
142
.github/workflows/tests.yml
vendored
@@ -7,16 +7,18 @@ on:
|
|||||||
- "main"
|
- "main"
|
||||||
paths:
|
paths:
|
||||||
- '**.py'
|
- '**.py'
|
||||||
- 'pyproject.toml'
|
- 'requirements.txt'
|
||||||
- '.github/workflows/*.yml'
|
- '.github/workflows/*.yml'
|
||||||
|
- 'requirements-tests.txt'
|
||||||
- 'cicd/cicd.sh'
|
- 'cicd/cicd.sh'
|
||||||
- 'cicd/Dockerfile.jinja'
|
- 'cicd/Dockerfile.jinja'
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize, reopened, ready_for_review]
|
types: [opened, synchronize, reopened, ready_for_review]
|
||||||
paths:
|
paths:
|
||||||
- '**.py'
|
- '**.py'
|
||||||
- 'pyproject.toml'
|
- 'requirements.txt'
|
||||||
- '.github/workflows/*.yml'
|
- '.github/workflows/*.yml'
|
||||||
|
- 'requirements-tests.txt'
|
||||||
- 'cicd/cicd.sh'
|
- 'cicd/cicd.sh'
|
||||||
- 'cicd/Dockerfile.jinja'
|
- 'cicd/Dockerfile.jinja'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
@@ -39,6 +41,7 @@ jobs:
|
|||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
cache: 'pip' # caching pip dependencies
|
||||||
- uses: pre-commit/action@v3.0.1
|
- uses: pre-commit/action@v3.0.1
|
||||||
env:
|
env:
|
||||||
SKIP: no-commit-to-branch
|
SKIP: no-commit-to-branch
|
||||||
@@ -52,10 +55,14 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python_version: ["3.11"]
|
python_version: ["3.11"]
|
||||||
pytorch_version: ["2.6.0", "2.7.1", "2.8.0"]
|
pytorch_version: ["2.7.1", "2.8.0", "2.9.0"]
|
||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
- name: cleanup node
|
||||||
|
run: |
|
||||||
|
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
|
||||||
|
|
||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
@@ -69,25 +76,28 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python_version }}
|
python-version: ${{ matrix.python_version }}
|
||||||
|
cache: 'pip' # caching pip dependencies
|
||||||
|
|
||||||
- name: Install uv
|
- name: upgrade pip
|
||||||
uses: astral-sh/setup-uv@v4
|
run: |
|
||||||
with:
|
pip3 install --upgrade pip
|
||||||
version: "latest"
|
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
|
||||||
|
|
||||||
- name: Install PyTorch
|
- name: Install PyTorch
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system torch==${{ matrix.pytorch_version }} torchvision
|
pip3 install --no-cache-dir torch==${{ matrix.pytorch_version }} torchvision
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
uv pip show --system torch
|
pip3 show torch
|
||||||
uv pip install --system wheel
|
pip3 install --no-cache-dir --no-build-isolation -U -e .
|
||||||
printf "torch==${{ matrix.pytorch_version }}\n" > torch-constraints.txt
|
python scripts/unsloth_install.py | sh
|
||||||
uv pip install --system --no-cache-dir --no-build-isolation -e ".[dev]" --constraints torch-constraints.txt
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
set -o pipefail
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
python scripts/unsloth_install.py | bash
|
|
||||||
python scripts/cutcrossentropy_install.py | bash
|
- name: cleanup pip cache
|
||||||
|
run: |
|
||||||
|
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
- name: Make sure PyTorch version wasn't clobbered
|
||||||
run: |
|
run: |
|
||||||
@@ -103,10 +113,10 @@ jobs:
|
|||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
python -m pytest -v --durations=10 -n 8 --dist loadfile --cov=axolotl --cov-report=xml --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/
|
pytest -v --durations=10 -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/ --cov=axolotl --cov-report=xml
|
||||||
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/monkeypatch/
|
pytest -v --durations=10 tests/monkeypatch/ --cov=axolotl --cov-append --cov-report=xml
|
||||||
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/patched/
|
pytest -v --durations=10 tests/patched/ --cov=axolotl --cov-append --cov-report=xml
|
||||||
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/cli/
|
pytest -v --durations=10 tests/cli/ --cov=axolotl --cov-append --cov-report=xml
|
||||||
|
|
||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v5
|
||||||
@@ -116,7 +126,6 @@ jobs:
|
|||||||
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
||||||
fail_ci_if_error: false
|
fail_ci_if_error: false
|
||||||
|
|
||||||
|
|
||||||
pytest-sdist:
|
pytest-sdist:
|
||||||
name: PyTest from Source Dist
|
name: PyTest from Source Dist
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -125,10 +134,14 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python_version: ["3.11"]
|
python_version: ["3.11"]
|
||||||
pytorch_version: ["2.6.0", "2.7.1", "2.8.0"]
|
pytorch_version: ["2.7.1", "2.8.0", "2.9.0"]
|
||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
- name: cleanup node
|
||||||
|
run: |
|
||||||
|
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
|
||||||
|
|
||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
@@ -142,26 +155,29 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python_version }}
|
python-version: ${{ matrix.python_version }}
|
||||||
|
cache: 'pip' # caching pip dependencies
|
||||||
|
|
||||||
- name: Install uv
|
- name: upgrade pip
|
||||||
uses: astral-sh/setup-uv@v4
|
run: |
|
||||||
with:
|
pip3 install --upgrade pip
|
||||||
version: "latest"
|
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 setuptools_scm build wheel psutil
|
||||||
|
|
||||||
- name: Install PyTorch
|
- name: Install PyTorch
|
||||||
run: |
|
run: |
|
||||||
uv pip install --system torch==${{ matrix.pytorch_version }} torchvision
|
pip3 install --no-cache-dir torch==${{ matrix.pytorch_version }} torchvision
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
uv pip show --system torch
|
pip3 show torch
|
||||||
uv pip install --system wheel build setuptools_scm
|
python -m build --no-isolation --sdist
|
||||||
python -m build --sdist
|
pip3 install --no-cache-dir --no-build-isolation dist/axolotl*.tar.gz
|
||||||
printf "torch==${{ matrix.pytorch_version }}\n" > torch-constraints.txt
|
|
||||||
tarball_path=$(echo dist/axolotl*.tar.gz)
|
|
||||||
uv pip install --no-cache-dir --no-build-isolation --system "${tarball_path}[dev]" --constraints torch-constraints.txt
|
|
||||||
python scripts/unsloth_install.py | sh
|
python scripts/unsloth_install.py | sh
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
|
||||||
|
- name: cleanup pip cache
|
||||||
|
run: |
|
||||||
|
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
- name: Make sure PyTorch version wasn't clobbered
|
||||||
run: |
|
run: |
|
||||||
@@ -176,9 +192,9 @@ jobs:
|
|||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
python -m pytest -v --durations=10 -n 8 --dist loadfile --cov=axolotl --cov-report=xml --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/
|
pytest -v --durations=10 -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/ --cov=axolotl --cov-report=xml
|
||||||
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/monkeypatch/
|
pytest -v --durations=10 tests/monkeypatch/ --cov=axolotl --cov-append --cov-report=xml
|
||||||
python -m pytest -v --durations=10 -n 8 tests/cli/
|
pytest -v --durations=10 tests/cli/
|
||||||
|
|
||||||
gate-skip-e2e:
|
gate-skip-e2e:
|
||||||
needs: [pre-commit, pytest, pytest-sdist]
|
needs: [pre-commit, pytest, pytest-sdist]
|
||||||
@@ -223,19 +239,13 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: 126
|
- cuda: 128
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.8.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.7.1
|
pytorch: 2.8.0
|
||||||
num_gpus: 1
|
num_gpus: 1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
- cuda: 126
|
dockerfile: "Dockerfile-uv.jinja"
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.1
|
|
||||||
num_gpus: 1
|
|
||||||
axolotl_extras:
|
|
||||||
dockerfile: "Dockerfile.jinja"
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -243,17 +253,13 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==1.0.2 jinja2 protobuf
|
pip install modal==1.0.2 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=${{ github.ref_name }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
@@ -285,15 +291,15 @@ jobs:
|
|||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
|
||||||
num_gpus: 1
|
|
||||||
axolotl_extras:
|
|
||||||
- cuda: 128
|
|
||||||
cuda_version: 12.8.1
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.1
|
pytorch: 2.7.1
|
||||||
num_gpus: 1
|
num_gpus: 1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
|
# - cuda: 128
|
||||||
|
# cuda_version: 12.8.1
|
||||||
|
# python_version: "3.11"
|
||||||
|
# pytorch: 2.7.1
|
||||||
|
# num_gpus: 1
|
||||||
|
# axolotl_extras:
|
||||||
- cuda: 128
|
- cuda: 128
|
||||||
cuda_version: 12.8.1
|
cuda_version: 12.8.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -301,6 +307,12 @@ jobs:
|
|||||||
num_gpus: 1
|
num_gpus: 1
|
||||||
gpu_type: "B200"
|
gpu_type: "B200"
|
||||||
axolotl_extras: fbgemm-gpu
|
axolotl_extras: fbgemm-gpu
|
||||||
|
- cuda: 128
|
||||||
|
cuda_version: 12.8.1
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.9.0
|
||||||
|
num_gpus: 1
|
||||||
|
axolotl_extras:
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -308,17 +320,13 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==1.0.2 jinja2 protobuf
|
pip install modal==1.0.2 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=${{ github.ref_name }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
@@ -355,17 +363,13 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@v4
|
|
||||||
with:
|
|
||||||
version: "latest"
|
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==1.0.2 jinja2 protobuf
|
pip install modal==1.0.2 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=${{ github.ref_name }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -191,5 +191,5 @@ out/
|
|||||||
# vim
|
# vim
|
||||||
*.swp
|
*.swp
|
||||||
|
|
||||||
# setuptools-scm generated version file
|
# scm auto-versioning
|
||||||
src/axolotl/_version.py
|
src/axolotl/_version.py
|
||||||
|
|||||||
@@ -11,13 +11,13 @@ repos:
|
|||||||
- id: no-commit-to-branch
|
- id: no-commit-to-branch
|
||||||
args: ['--branch', 'main']
|
args: ['--branch', 'main']
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.12.12
|
rev: v0.14.7
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
args: [--fix]
|
args: [--fix]
|
||||||
- id: ruff-format
|
- id: ruff-format
|
||||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
rev: v1.17.1
|
rev: v1.19.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: mypy
|
- id: mypy
|
||||||
additional_dependencies:
|
additional_dependencies:
|
||||||
@@ -26,7 +26,7 @@ repos:
|
|||||||
'pydantic>=2.5.3',
|
'pydantic>=2.5.3',
|
||||||
]
|
]
|
||||||
- repo: https://github.com/PyCQA/bandit
|
- repo: https://github.com/PyCQA/bandit
|
||||||
rev: 1.8.6
|
rev: 1.9.2
|
||||||
hooks:
|
hooks:
|
||||||
- id: bandit
|
- id: bandit
|
||||||
args: [
|
args: [
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
FROM axolotlai/axolotl-cloud:main-py3.11-cu124-2.6.0
|
FROM axolotlai/axolotl-cloud:main-py3.11-cu124-2.6.0
|
||||||
|
|
||||||
COPY .runpod/requirements.txt /requirements.txt
|
COPY .runpod/requirements.txt /requirements.txt
|
||||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
/root/.local/bin/uv pip install --system -r /requirements.txt
|
python3 -m pip install --upgrade pip && \
|
||||||
|
python3 -m pip install --upgrade -r /requirements.txt
|
||||||
|
|
||||||
# Environment settings
|
# Environment settings
|
||||||
ARG BASE_VOLUME="/runpod-volume"
|
ARG BASE_VOLUME="/runpod-volume"
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
include pyproject.toml
|
include requirements.txt
|
||||||
include README.md
|
include README.md
|
||||||
include LICENSE
|
include LICENSE
|
||||||
|
include src/setuptools_axolotl_dynamic_dependencies.py
|
||||||
include src/axolotl/utils/chat_templates/templates/*.jinja
|
include src/axolotl/utils/chat_templates/templates/*.jinja
|
||||||
recursive-include src/axolotl *.py
|
recursive-include axolotl *.py
|
||||||
|
|||||||
49
README.md
49
README.md
@@ -29,6 +29,10 @@
|
|||||||
|
|
||||||
## 🎉 Latest Updates
|
## 🎉 Latest Updates
|
||||||
|
|
||||||
|
- 2025/11: Axolotl now includes support for [Olmo3](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/olmo3).
|
||||||
|
- 2025/10: New model support has been added in Axolotl for: [Qwen3 Next](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/qwen3-next), [Qwen2.5-vl, Qwen3-vl](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/qwen2_5-vl), [Qwen3, Qwen3MoE](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/qwen3), [Granite 4](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/granite4), [HunYuan](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/hunyuan), [Magistral 2509](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/magistral#vision), [Apertus](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/apertus), and [Seed-OSS](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/seed-oss).
|
||||||
|
- 2025/09: Axolotl now has text diffusion training. Read more [here](https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/diffusion).
|
||||||
|
- 2025/08: QAT has been updated to include NVFP4 support. See [PR](https://github.com/axolotl-ai-cloud/axolotl/pull/3107).
|
||||||
- 2025/07:
|
- 2025/07:
|
||||||
- ND Parallelism support has been added into Axolotl. Compose Context Parallelism (CP), Tensor Parallelism (TP), and Fully Sharded Data Parallelism (FSDP) within a single node and across multiple nodes. Check out the [blog post](https://huggingface.co/blog/accelerate-nd-parallel) for more info.
|
- ND Parallelism support has been added into Axolotl. Compose Context Parallelism (CP), Tensor Parallelism (TP), and Fully Sharded Data Parallelism (FSDP) within a single node and across multiple nodes. Check out the [blog post](https://huggingface.co/blog/accelerate-nd-parallel) for more info.
|
||||||
- Axolotl adds more models: [GPT-OSS](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/gpt-oss), [Gemma 3n](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/gemma3n), [Liquid Foundation Model 2 (LFM2)](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/lfm2), and [Arcee Foundation Models (AFM)](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/afm).
|
- Axolotl adds more models: [GPT-OSS](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/gpt-oss), [Gemma 3n](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/gemma3n), [Liquid Foundation Model 2 (LFM2)](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/lfm2), and [Arcee Foundation Models (AFM)](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/afm).
|
||||||
@@ -36,12 +40,12 @@
|
|||||||
- [Voxtral](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/voxtral), [Magistral 1.1](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/magistral), and [Devstral](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/devstral) with mistral-common tokenizer support has been integrated in Axolotl!
|
- [Voxtral](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/voxtral), [Magistral 1.1](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/magistral), and [Devstral](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/devstral) with mistral-common tokenizer support has been integrated in Axolotl!
|
||||||
- TiledMLP support for single-GPU to multi-GPU training with DDP, DeepSpeed and FSDP support has been added to support Arctic Long Sequence Training. (ALST). See [examples](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/alst) for using ALST with Axolotl!
|
- TiledMLP support for single-GPU to multi-GPU training with DDP, DeepSpeed and FSDP support has been added to support Arctic Long Sequence Training. (ALST). See [examples](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/alst) for using ALST with Axolotl!
|
||||||
- 2025/05: Quantization Aware Training (QAT) support has been added to Axolotl. Explore the [docs](https://docs.axolotl.ai/docs/qat.html) to learn more!
|
- 2025/05: Quantization Aware Training (QAT) support has been added to Axolotl. Explore the [docs](https://docs.axolotl.ai/docs/qat.html) to learn more!
|
||||||
- 2025/03: Axolotl has implemented Sequence Parallelism (SP) support. Read the [blog](https://huggingface.co/blog/axolotl-ai-co/long-context-with-sequence-parallelism-in-axolotl) and [docs](https://docs.axolotl.ai/docs/sequence_parallelism.html) to learn how to scale your context length when fine-tuning.
|
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
|
||||||
<summary>Expand older updates</summary>
|
<summary>Expand older updates</summary>
|
||||||
|
|
||||||
|
- 2025/03: Axolotl has implemented Sequence Parallelism (SP) support. Read the [blog](https://huggingface.co/blog/axolotl-ai-co/long-context-with-sequence-parallelism-in-axolotl) and [docs](https://docs.axolotl.ai/docs/sequence_parallelism.html) to learn how to scale your context length when fine-tuning.
|
||||||
- 2025/06: Magistral with mistral-common tokenizer support has been added to Axolotl. See [examples](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/magistral) to start training your own Magistral models with Axolotl!
|
- 2025/06: Magistral with mistral-common tokenizer support has been added to Axolotl. See [examples](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/magistral) to start training your own Magistral models with Axolotl!
|
||||||
- 2025/04: Llama 4 support has been added in Axolotl. See [examples](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/llama-4) to start training your own Llama 4 models with Axolotl's linearized version!
|
- 2025/04: Llama 4 support has been added in Axolotl. See [examples](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/llama-4) to start training your own Llama 4 models with Axolotl's linearized version!
|
||||||
- 2025/03: (Beta) Fine-tuning Multimodal models is now supported in Axolotl. Check out the [docs](https://docs.axolotl.ai/docs/multimodal.html) to fine-tune your own!
|
- 2025/03: (Beta) Fine-tuning Multimodal models is now supported in Axolotl. Check out the [docs](https://docs.axolotl.ai/docs/multimodal.html) to fine-tune your own!
|
||||||
@@ -65,9 +69,15 @@ Features:
|
|||||||
- **Flexible Dataset Handling**: Load from local, HuggingFace, and cloud (S3, Azure, GCP, OCI) datasets.
|
- **Flexible Dataset Handling**: Load from local, HuggingFace, and cloud (S3, Azure, GCP, OCI) datasets.
|
||||||
- **Cloud Ready**: We ship [Docker images](https://hub.docker.com/u/axolotlai) and also [PyPI packages](https://pypi.org/project/axolotl/) for use on cloud platforms and local hardware.
|
- **Cloud Ready**: We ship [Docker images](https://hub.docker.com/u/axolotlai) and also [PyPI packages](https://pypi.org/project/axolotl/) for use on cloud platforms and local hardware.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 🚀 Quick Start - LLM Fine-tuning in Minutes
|
## 🚀 Quick Start - LLM Fine-tuning in Minutes
|
||||||
|
|
||||||
**Requirements**: NVIDIA GPU (Ampere+) or AMD GPU, Python 3.11+
|
**Requirements**:
|
||||||
|
|
||||||
|
- NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
|
||||||
|
- Python 3.11
|
||||||
|
- PyTorch ≥2.7.1
|
||||||
|
|
||||||
### Google Colab
|
### Google Colab
|
||||||
|
|
||||||
@@ -75,35 +85,15 @@ Features:
|
|||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
|
|
||||||
#### Project setup (uv add)
|
#### Using pip
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Install uv
|
pip3 install -U packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
|
||||||
|
|
||||||
# Initialize or enter your project
|
|
||||||
uv init my-project && cd my-project
|
|
||||||
uv add axolotl
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
source .venv/bin/activate
|
|
||||||
|
|
||||||
# Download example axolotl configs, deepspeed configs
|
# Download example axolotl configs, deepspeed configs
|
||||||
axolotl fetch examples
|
axolotl fetch examples
|
||||||
axolotl fetch deepspeed_configs # optional
|
axolotl fetch deepspeed_configs # OPTIONAL
|
||||||
```
|
|
||||||
|
|
||||||
#### Quick try (uv pip)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Install uv if needed
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
|
|
||||||
uv pip install axolotl
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
|
|
||||||
# Download example axolotl configs, deepspeed configs
|
|
||||||
axolotl fetch examples
|
|
||||||
axolotl fetch deepspeed_configs # optional
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Using Docker
|
#### Using Docker
|
||||||
@@ -168,6 +158,13 @@ That's it! Check out our [Getting Started Guide](https://docs.axolotl.ai/docs/ge
|
|||||||
|
|
||||||
Contributions are welcome! Please see our [Contributing Guide](https://github.com/axolotl-ai-cloud/axolotl/blob/main/.github/CONTRIBUTING.md) for details.
|
Contributions are welcome! Please see our [Contributing Guide](https://github.com/axolotl-ai-cloud/axolotl/blob/main/.github/CONTRIBUTING.md) for details.
|
||||||
|
|
||||||
|
## 📈 Telemetry
|
||||||
|
|
||||||
|
Axolotl has opt-out telemetry that helps us understand how the project is being used
|
||||||
|
and prioritize improvements. We collect basic system information, model types, and
|
||||||
|
error rates—never personal data or file paths. Telemetry is enabled by default. To
|
||||||
|
disable it, set AXOLOTL_DO_NOT_TRACK=1. For more details, see our [telemetry documentation](https://docs.axolotl.ai/docs/telemetry.html).
|
||||||
|
|
||||||
## ❤️ Sponsors
|
## ❤️ Sponsors
|
||||||
|
|
||||||
Interested in sponsoring? Contact us at [wing@axolotl.ai](mailto:wing@axolotl.ai)
|
Interested in sponsoring? Contact us at [wing@axolotl.ai](mailto:wing@axolotl.ai)
|
||||||
|
|||||||
@@ -241,6 +241,7 @@ website:
|
|||||||
- docs/installation.qmd
|
- docs/installation.qmd
|
||||||
- docs/inference.qmd
|
- docs/inference.qmd
|
||||||
- docs/cli.qmd
|
- docs/cli.qmd
|
||||||
|
- docs/telemetry.qmd
|
||||||
- docs/config-reference.qmd
|
- docs/config-reference.qmd
|
||||||
- text: "API Reference"
|
- text: "API Reference"
|
||||||
href: docs/api
|
href: docs/api
|
||||||
|
|||||||
53
cicd/Dockerfile-uv.jinja
Normal file
53
cicd/Dockerfile-uv.jinja
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
FROM axolotlai/axolotl-base-uv:{{ BASE_TAG }}
|
||||||
|
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
||||||
|
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
||||||
|
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
||||||
|
ENV CUDA="{{ CUDA }}"
|
||||||
|
ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
|
||||||
|
ENV GITHUB_REF="{{ GITHUB_REF }}"
|
||||||
|
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
||||||
|
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
||||||
|
ENV HF_HOME="{{ HF_HOME }}"
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm
|
||||||
|
|
||||||
|
WORKDIR /workspace
|
||||||
|
|
||||||
|
RUN git clone --depth=1 https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
|
|
||||||
|
WORKDIR /workspace/axolotl
|
||||||
|
|
||||||
|
RUN git fetch origin +$GITHUB_REF && \
|
||||||
|
git checkout FETCH_HEAD
|
||||||
|
|
||||||
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
|
RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
|
||||||
|
sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt; \
|
||||||
|
sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt; \
|
||||||
|
sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt; \
|
||||||
|
sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt; \
|
||||||
|
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
RUN uv pip install packaging==23.2 setuptools==75.8.0
|
||||||
|
RUN uv pip install torchvision
|
||||||
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
|
uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||||
|
else \
|
||||||
|
uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
RUN python scripts/unsloth_install.py --uv | sh
|
||||||
|
RUN python scripts/cutcrossentropy_install.py --uv | sh
|
||||||
|
|
||||||
|
# So we can test the Docker image
|
||||||
|
RUN uv pip install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
|
||||||
|
# fix so that git fetch/pull from remote works
|
||||||
|
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||||
|
git config --get remote.origin.fetch
|
||||||
|
|
||||||
|
# helper for huggingface-login cli
|
||||||
|
RUN git config --global credential.helper store
|
||||||
@@ -1,10 +1,6 @@
|
|||||||
FROM axolotlai/axolotl-base-uv:{{ BASE_TAG }}
|
FROM axolotlai/axolotl-base:{{ BASE_TAG }}
|
||||||
|
|
||||||
SHELL ["/bin/bash", "-euxo", "pipefail", "-c"]
|
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
|
|
||||||
ARG VENV_PYTHON="/workspace/axolotl-venv/bin/python"
|
|
||||||
|
|
||||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
|
||||||
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
||||||
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
||||||
ENV CUDA="{{ CUDA }}"
|
ENV CUDA="{{ CUDA }}"
|
||||||
@@ -13,7 +9,7 @@ ENV GITHUB_REF="{{ GITHUB_REF }}"
|
|||||||
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
||||||
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
||||||
ENV HF_HOME="{{ HF_HOME }}"
|
ENV HF_HOME="{{ HF_HOME }}"
|
||||||
ENV VENV_PYTHON=$VENV_PYTHON
|
ENV AXOLOTL_DATASET_NUM_PROC="8"
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm
|
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm
|
||||||
@@ -29,27 +25,25 @@ RUN git fetch origin +$GITHUB_REF && \
|
|||||||
|
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
|
RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
|
||||||
sed -i 's#"transformers[^"]*"#"transformers @ git+https://github.com/huggingface/transformers.git@main"#' pyproject.toml; \
|
sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt; \
|
||||||
sed -i 's#"peft[^"]*"#"peft @ git+https://github.com/huggingface/peft.git@main"#' pyproject.toml; \
|
sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt; \
|
||||||
sed -i 's#"accelerate[^"]*"#"accelerate @ git+https://github.com/huggingface/accelerate.git@main"#' pyproject.toml; \
|
sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt; \
|
||||||
sed -i 's#"trl[^"]*"#"trl @ git+https://github.com/huggingface/trl.git@main"#' pyproject.toml; \
|
sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt; \
|
||||||
sed -i 's#"datasets[^"]*"#"datasets @ git+https://github.com/huggingface/datasets.git@main"#' pyproject.toml; \
|
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN uv pip install --python "$VENV_PYTHON" packaging==23.2 setuptools==75.8.0 pip
|
RUN pip install packaging==23.2 setuptools==75.8.0 psutil
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray,${AXOLOTL_EXTRAS}] $AXOLOTL_ARGS; \
|
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||||
else \
|
else \
|
||||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN uv pip install --python "$VENV_PYTHON" --no-build-isolation flash-attn $AXOLOTL_ARGS
|
RUN python scripts/unsloth_install.py | sh
|
||||||
|
RUN python scripts/cutcrossentropy_install.py | sh
|
||||||
RUN "$VENV_PYTHON" scripts/unsloth_install.py | sh
|
|
||||||
RUN "$VENV_PYTHON" scripts/cutcrossentropy_install.py | sh
|
|
||||||
|
|
||||||
# So we can test the Docker image
|
# So we can test the Docker image
|
||||||
RUN uv pip install --python "$VENV_PYTHON" -e ".[dev]"
|
RUN pip install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
|
||||||
# fix so that git fetch/pull from remote works
|
# fix so that git fetch/pull from remote works
|
||||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||||
|
|||||||
16
cicd/cicd.sh
16
cicd/cicd.sh
@@ -4,7 +4,7 @@ set -e
|
|||||||
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
||||||
|
|
||||||
# Run unit tests with initial coverage report
|
# Run unit tests with initial coverage report
|
||||||
uv run pytest -v --durations=10 -n8 \
|
pytest -v --durations=10 -n8 \
|
||||||
--ignore=tests/e2e/ \
|
--ignore=tests/e2e/ \
|
||||||
--ignore=tests/patched/ \
|
--ignore=tests/patched/ \
|
||||||
--ignore=tests/cli \
|
--ignore=tests/cli \
|
||||||
@@ -12,36 +12,36 @@ uv run pytest -v --durations=10 -n8 \
|
|||||||
--cov=axolotl
|
--cov=axolotl
|
||||||
|
|
||||||
# Run lora kernels tests with coverage append
|
# Run lora kernels tests with coverage append
|
||||||
uv run pytest -v --durations=10 \
|
pytest -v --durations=10 \
|
||||||
/workspace/axolotl/tests/e2e/patched/lora_kernels \
|
/workspace/axolotl/tests/e2e/patched/lora_kernels \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
# Run patched tests excluding lora kernels with coverage append
|
# Run patched tests excluding lora kernels with coverage append
|
||||||
uv run pytest --full-trace -vvv --durations=10 \
|
pytest --full-trace -vvv --durations=10 \
|
||||||
--ignore=tests/e2e/patched/lora_kernels \
|
--ignore=tests/e2e/patched/lora_kernels \
|
||||||
/workspace/axolotl/tests/e2e/patched \
|
/workspace/axolotl/tests/e2e/patched \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
# Run solo tests with coverage append
|
# Run solo tests with coverage append
|
||||||
uv run pytest -v --durations=10 -n1 \
|
pytest -v --durations=10 -n1 \
|
||||||
/workspace/axolotl/tests/e2e/solo/ \
|
/workspace/axolotl/tests/e2e/solo/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
# Run integration tests with coverage append
|
# Run integration tests with coverage append
|
||||||
uv run pytest -v --durations=10 \
|
pytest -v --durations=10 \
|
||||||
/workspace/axolotl/tests/e2e/integrations/ \
|
/workspace/axolotl/tests/e2e/integrations/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
uv run pytest -v --durations=10 /workspace/axolotl/tests/cli \
|
pytest -v --durations=10 /workspace/axolotl/tests/cli \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
# Run remaining e2e tests with coverage append and final report
|
# Run remaining e2e tests with coverage append and final report
|
||||||
uv run pytest -v --durations=10 \
|
pytest -v --durations=10 \
|
||||||
--ignore=tests/e2e/solo/ \
|
--ignore=tests/e2e/solo/ \
|
||||||
--ignore=tests/e2e/patched/ \
|
--ignore=tests/e2e/patched/ \
|
||||||
--ignore=tests/e2e/multigpu/ \
|
--ignore=tests/e2e/multigpu/ \
|
||||||
@@ -52,4 +52,4 @@ uv run pytest -v --durations=10 \
|
|||||||
--cov-append \
|
--cov-append \
|
||||||
--cov-report=xml:e2e-coverage.xml
|
--cov-report=xml:e2e-coverage.xml
|
||||||
|
|
||||||
uv run codecov upload-process -t $CODECOV_TOKEN -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION} || true
|
codecov upload-process -t $CODECOV_TOKEN -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION} || true
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ df_args = {
|
|||||||
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
||||||
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
||||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
||||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-uv-py3.11-cu126-2.6.0"),
|
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
|
||||||
"CUDA": os.environ.get("CUDA", "126"),
|
"CUDA": os.environ.get("CUDA", "126"),
|
||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ df_args = {
|
|||||||
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
||||||
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
||||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
||||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-uv-py3.11-cu126-2.6.0"),
|
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
|
||||||
"CUDA": os.environ.get("CUDA", "126"),
|
"CUDA": os.environ.get("CUDA", "126"),
|
||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
@@ -65,8 +65,13 @@ def run_cmd(cmd: str, run_folder: str):
|
|||||||
import subprocess # nosec
|
import subprocess # nosec
|
||||||
|
|
||||||
sp_env = os.environ.copy()
|
sp_env = os.environ.copy()
|
||||||
sp_env["AXOLOTL_DATASET_PROCESSES"] = "8"
|
sp_env["AXOLOTL_DATASET_NUM_PROC"] = "8"
|
||||||
|
|
||||||
# Propagate errors from subprocess.
|
# Propagate errors from subprocess.
|
||||||
if exit_code := subprocess.call(cmd.split(), cwd=run_folder, env=sp_env): # nosec
|
try:
|
||||||
exit(exit_code)
|
exit_code = subprocess.call(cmd.split(), cwd=run_folder, env=sp_env) # nosec
|
||||||
|
if exit_code:
|
||||||
|
print(f"Command '{cmd}' failed with exit code {exit_code}")
|
||||||
|
return exit_code
|
||||||
|
except Exception as e: # pylint: disable=broad-except
|
||||||
|
print(f"Command '{cmd}' failed with exception {e}")
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ datasets:
|
|||||||
val_set_size: 0
|
val_set_size: 0
|
||||||
output_dir: temp_debug/axolotl_outputs/model
|
output_dir: temp_debug/axolotl_outputs/model
|
||||||
dataset_prepared_path: temp_debug/axolotl_outputs/data
|
dataset_prepared_path: temp_debug/axolotl_outputs/data
|
||||||
dataset_processes: 1
|
dataset_num_proc: 1
|
||||||
|
|
||||||
sequence_len: 4096
|
sequence_len: 4096
|
||||||
sample_packing: false
|
sample_packing: false
|
||||||
|
|||||||
@@ -1,19 +1,13 @@
|
|||||||
ARG BASE_TAG=main-base-uv
|
ARG BASE_TAG=main-base
|
||||||
FROM axolotlai/axolotl-base-uv:$BASE_TAG
|
FROM axolotlai/axolotl-base:$BASE_TAG
|
||||||
|
|
||||||
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
||||||
ARG AXOLOTL_EXTRAS=""
|
ARG AXOLOTL_EXTRAS=""
|
||||||
ARG AXOLOTL_ARGS=""
|
ARG AXOLOTL_ARGS=""
|
||||||
ARG CUDA="118"
|
ARG CUDA="118"
|
||||||
ARG PYTORCH_VERSION="2.1.2"
|
ARG PYTORCH_VERSION="2.1.2"
|
||||||
ARG GIT_REF="refs/heads/main"
|
|
||||||
ARG GIT_SHA="HEAD"
|
|
||||||
ARG VENV_PYTHON="/workspace/axolotl-venv/bin/python"
|
|
||||||
|
|
||||||
ENV PYTORCH_VERSION=$PYTORCH_VERSION
|
ENV PYTORCH_VERSION=$PYTORCH_VERSION
|
||||||
ENV GIT_REF=$GIT_REF
|
|
||||||
ENV GIT_SHA=$GIT_SHA
|
|
||||||
ENV VENV_PYTHON=$VENV_PYTHON
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev rsync s3fs && \
|
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev rsync s3fs && \
|
||||||
@@ -26,19 +20,16 @@ RUN git clone --depth=1 https://github.com/axolotl-ai-cloud/axolotl.git
|
|||||||
|
|
||||||
WORKDIR /workspace/axolotl
|
WORKDIR /workspace/axolotl
|
||||||
|
|
||||||
# Ensure we are on the expected commit and break Docker cache between revisions
|
|
||||||
RUN git fetch origin "$GIT_REF" && git checkout "$GIT_SHA"
|
|
||||||
|
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||||
else \
|
else \
|
||||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||||
fi && \
|
fi && \
|
||||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation flash-attn $AXOLOTL_ARGS && \
|
python scripts/unsloth_install.py | sh && \
|
||||||
"$VENV_PYTHON" scripts/unsloth_install.py | sh && \
|
python scripts/cutcrossentropy_install.py | sh && \
|
||||||
"$VENV_PYTHON" scripts/cutcrossentropy_install.py | sh && \
|
pip install pytest && \
|
||||||
uv pip install --python "$VENV_PYTHON" pytest
|
pip cache purge
|
||||||
|
|
||||||
# fix so that git fetch/pull from remote works with shallow clone
|
# fix so that git fetch/pull from remote works with shallow clone
|
||||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||||
|
|||||||
@@ -35,18 +35,24 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
|
|||||||
|
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
|
||||||
RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==23.2 setuptools==75.8.0 wheel && \
|
RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==23.2 setuptools==75.8.0 wheel psutil && \
|
||||||
python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} torchvision --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \
|
python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} torchvision --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \
|
||||||
CAUSAL_CONV1D_FORCE_CXX11_ABI=TRUE CAUSAL_CONV1D_FORCE_BUILD=TRUE python3 -m pip install --no-cache-dir causal_conv1d==1.5.2 && \
|
|
||||||
python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" && \
|
|
||||||
python3 -m pip cache purge
|
python3 -m pip cache purge
|
||||||
|
|
||||||
|
RUN if [ "$CUDA" != "130" ] ; then \
|
||||||
|
CAUSAL_CONV1D_FORCE_CXX11_ABI=TRUE CAUSAL_CONV1D_FORCE_BUILD=TRUE python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@v1.5.4"; \
|
||||||
|
python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main"; \
|
||||||
|
python3 -m pip cache purge; \
|
||||||
|
fi
|
||||||
|
|
||||||
RUN git lfs install --skip-repo && \
|
RUN git lfs install --skip-repo && \
|
||||||
pip3 install awscli && \
|
pip3 install awscli && \
|
||||||
# The base image ships with `pydantic==1.8.2` which is not working
|
# The base image ships with `pydantic==1.8.2` which is not working
|
||||||
pip3 install -U --no-cache-dir pydantic==1.10.10 && \
|
pip3 install -U --no-cache-dir pydantic==1.10.10 && \
|
||||||
pip3 cache purge
|
pip3 cache purge
|
||||||
|
|
||||||
RUN if [ "$PYTORCH_VERSION" = "2.6.0" ] && [ "$CUDA" = "124" ] ; then \
|
RUN if [ "$PYTORCH_VERSION" = "2.9.1" ] && [ "$CUDA" = "128" ] ; then \
|
||||||
FLASH_ATTENTION_FORCE_BUILD="TRUE" uv pip install --no-build-isolation flash-attn==2.8.0.post2; \
|
wget https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.17/flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||||
|
pip3 install --no-cache-dir flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||||
|
rm flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ EXPOSE 22
|
|||||||
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
||||||
COPY scripts/motd /etc/motd
|
COPY scripts/motd /etc/motd
|
||||||
|
|
||||||
RUN uv pip install --python "$VENV_PYTHON" jupyterlab notebook ipywidgets && \
|
RUN pip install jupyterlab notebook ipywidgets && \
|
||||||
"$VENV_PYTHON" -m jupyter lab clean
|
jupyter lab clean
|
||||||
RUN apt update && \
|
RUN apt update && \
|
||||||
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop && \
|
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop && \
|
||||||
rm -rf /var/cache/apt/archives && \
|
rm -rf /var/cache/apt/archives && \
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ EXPOSE 22
|
|||||||
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
||||||
COPY scripts/motd /etc/motd
|
COPY scripts/motd /etc/motd
|
||||||
|
|
||||||
RUN uv pip install --python "$VENV_PYTHON" jupyterlab notebook ipywidgets && \
|
RUN pip install jupyterlab notebook ipywidgets && \
|
||||||
"$VENV_PYTHON" -m jupyter lab clean
|
jupyter lab clean
|
||||||
RUN apt update && \
|
RUN apt update && \
|
||||||
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm && \
|
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm && \
|
||||||
rm -rf /var/cache/apt/archives && \
|
rm -rf /var/cache/apt/archives && \
|
||||||
|
|||||||
@@ -24,14 +24,13 @@ RUN git fetch origin +$GITHUB_REF && \
|
|||||||
|
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
uv pip install --no-build-isolation -e .[deepspeed,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||||
else \
|
else \
|
||||||
uv pip install --no-build-isolation -e .[deepspeed,mamba-ssm] $AXOLOTL_ARGS; \
|
pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
|
||||||
fi && \
|
fi
|
||||||
uv pip install --no-build-isolation flash-attn $AXOLOTL_ARGS
|
|
||||||
|
|
||||||
# So we can test the Docker image
|
# So we can test the Docker image
|
||||||
RUN uv pip install pytest
|
RUN pip install pytest
|
||||||
|
|
||||||
# fix so that git fetch/pull from remote works
|
# fix so that git fetch/pull from remote works
|
||||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
|||||||
ENV PYTHON_VERSION=$PYTHON_VERSION
|
ENV PYTHON_VERSION=$PYTHON_VERSION
|
||||||
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
|
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
|
||||||
ENV UV_TORCH_BACKEND="cu${CUDA}"
|
ENV UV_TORCH_BACKEND="cu${CUDA}"
|
||||||
ENV VENV_PYTHON=/workspace/axolotl-venv/bin/python
|
|
||||||
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config curl && rm -rf /var/lib/apt/lists/* \
|
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config curl && rm -rf /var/lib/apt/lists/* \
|
||||||
@@ -30,8 +29,14 @@ RUN uv venv --no-project --relocatable axolotl-venv
|
|||||||
|
|
||||||
ENV PATH="/workspace/axolotl-venv/bin:${PATH}"
|
ENV PATH="/workspace/axolotl-venv/bin:${PATH}"
|
||||||
|
|
||||||
RUN uv pip install --python "$VENV_PYTHON" packaging setuptools wheel psutil protobuf grpclib \
|
RUN uv pip install packaging setuptools wheel psutil \
|
||||||
&& uv pip install --python "$VENV_PYTHON" torch==${PYTORCH_VERSION} \
|
&& uv pip install torch==${PYTORCH_VERSION} torchvision \
|
||||||
&& uv pip install --python "$VENV_PYTHON" --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" \
|
&& uv pip install --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" \
|
||||||
&& uv pip install --python "$VENV_PYTHON" "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" \
|
&& uv pip install "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" \
|
||||||
&& uv pip install --python "$VENV_PYTHON" awscli pydantic
|
&& uv pip install awscli pydantic
|
||||||
|
|
||||||
|
RUN if [ "$PYTORCH_VERSION" = "2.9.0" ] && [ "$CUDA" = "128" ] ; then \
|
||||||
|
wget https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.17/flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||||
|
uv pip install --no-cache-dir flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||||
|
rm flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||||
|
fi
|
||||||
|
|||||||
@@ -218,6 +218,13 @@ If you have tool arguments with same name but different dtypes (like `"time": st
|
|||||||
```
|
```
|
||||||
"arguments": "{\"...\": \"...\"}"
|
"arguments": "{\"...\": \"...\"}"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The same is applicable for tool parameters.
|
||||||
|
|
||||||
|
```
|
||||||
|
"parameters": "{\"...\": \"...\"}"
|
||||||
|
```
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
Example config for Llama4:
|
Example config for Llama4:
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ While debugging it's helpful to simplify your test scenario as much as possible.
|
|||||||
1. **Make sure you are using the latest version of axolotl**: This project changes often and bugs get fixed fast. Check your git branch and make sure you have pulled the latest changes from `main`.
|
1. **Make sure you are using the latest version of axolotl**: This project changes often and bugs get fixed fast. Check your git branch and make sure you have pulled the latest changes from `main`.
|
||||||
1. **Eliminate concurrency**: Restrict the number of processes to 1 for both training and data preprocessing:
|
1. **Eliminate concurrency**: Restrict the number of processes to 1 for both training and data preprocessing:
|
||||||
- Set `CUDA_VISIBLE_DEVICES` to a single GPU, ex: `export CUDA_VISIBLE_DEVICES=0`.
|
- Set `CUDA_VISIBLE_DEVICES` to a single GPU, ex: `export CUDA_VISIBLE_DEVICES=0`.
|
||||||
- Set `dataset_processes: 1` in your axolotl config or run the training command with `--dataset_processes=1`.
|
- Set `dataset_num_proc: 1` in your axolotl config or run the training command with `--dataset_num_proc=1`.
|
||||||
2. **Use a small dataset**: Construct or use a small dataset from HF Hub. When using a small dataset, you will often have to make sure `sample_packing: False` and `eval_sample_packing: False` to avoid errors. If you are in a pinch and don't have time to construct a small dataset but want to use from the HF Hub, you can shard the data (this will still tokenize the entire dataset, but will only use a fraction of the data for training. For example, to shard the dataset into 20 pieces, add the following to your axolotl config):
|
2. **Use a small dataset**: Construct or use a small dataset from HF Hub. When using a small dataset, you will often have to make sure `sample_packing: False` and `eval_sample_packing: False` to avoid errors. If you are in a pinch and don't have time to construct a small dataset but want to use from the HF Hub, you can shard the data (this will still tokenize the entire dataset, but will only use a fraction of the data for training. For example, to shard the dataset into 20 pieces, add the following to your axolotl config):
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -72,8 +72,8 @@ datasets:
|
|||||||
Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/userguide/development_mode.html) of Axolotl, which ensures that changes you make to the code are reflected at runtime. Run the following commands from the root of this project:
|
Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/userguide/development_mode.html) of Axolotl, which ensures that changes you make to the code are reflected at runtime. Run the following commands from the root of this project:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv sync --extra deepspeed
|
pip3 install packaging
|
||||||
uv pip install flash-attn --no-build-isolation
|
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Remote Hosts
|
#### Remote Hosts
|
||||||
@@ -101,7 +101,7 @@ For example, to mimic the command `cd devtools && CUDA_VISIBLE_DEVICES=0 acceler
|
|||||||
"-m", "axolotl.cli.train", "dev_chat_template.yml",
|
"-m", "axolotl.cli.train", "dev_chat_template.yml",
|
||||||
// The flags below simplify debugging by overriding the axolotl config
|
// The flags below simplify debugging by overriding the axolotl config
|
||||||
// with the debugging tips above. Modify as needed.
|
// with the debugging tips above. Modify as needed.
|
||||||
"--dataset_processes=1", // limits data preprocessing to one process
|
"--dataset_num_proc=1", // limits data preprocessing to one process
|
||||||
"--max_steps=1", // limits training to just one step
|
"--max_steps=1", // limits training to just one step
|
||||||
"--batch_size=1", // minimizes batch size
|
"--batch_size=1", // minimizes batch size
|
||||||
"--micro_batch_size=1", // minimizes batch size
|
"--micro_batch_size=1", // minimizes batch size
|
||||||
@@ -213,8 +213,8 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
|||||||
You will now be in the container. Next, perform an editable install of Axolotl:
|
You will now be in the container. Next, perform an editable install of Axolotl:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv sync --extra deepspeed
|
pip3 install packaging
|
||||||
uv pip install flash-attn --no-build-isolation
|
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||||
```
|
```
|
||||||
|
|
||||||
### Attach To Container
|
### Attach To Container
|
||||||
|
|||||||
@@ -63,6 +63,14 @@ description: Frequently asked questions
|
|||||||
|
|
||||||
> A: There seems to be a wheel issue with FA2 2.8.0 on CUDA 12.4. Try CUDA 12.6 instead or downgrade to FA2 2.7.4. Please refer to the upstream issue: https://github.com/Dao-AILab/flash-attention/issues/1717.
|
> A: There seems to be a wheel issue with FA2 2.8.0 on CUDA 12.4. Try CUDA 12.6 instead or downgrade to FA2 2.7.4. Please refer to the upstream issue: https://github.com/Dao-AILab/flash-attention/issues/1717.
|
||||||
|
|
||||||
|
**Q: Can we mix text and text+image datasets for VLM training?**
|
||||||
|
|
||||||
|
> A: Yes, you can for newer VLM arch. The ones that would not work are LLaVA / Pixtral arch. If you notice one not working, please let us know!
|
||||||
|
|
||||||
|
**Q: Why is `memory/max_*` different from `nvidia-smi`?**
|
||||||
|
|
||||||
|
> A: We use `torch` APIs to retrieve this information. You can see https://docs.pytorch.org/docs/stable/notes/cuda.html#cuda-memory-management for more information.
|
||||||
|
|
||||||
### Chat templates
|
### Chat templates
|
||||||
|
|
||||||
**Q: `jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' / 'role' / ____`**
|
**Q: `jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' / 'role' / ____`**
|
||||||
|
|||||||
@@ -29,40 +29,19 @@ Follow the instructions at: [https://pytorch.org/get-started/locally/](https://p
|
|||||||
For Blackwell GPUs, please use Pytorch 2.7.0 and CUDA 12.8.
|
For Blackwell GPUs, please use Pytorch 2.7.0 and CUDA 12.8.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
### uv Installation (Recommended) {#sec-uv-quick}
|
### PyPI Installation (Recommended) {#sec-pypi}
|
||||||
|
|
||||||
```{.bash}
|
```{.bash}
|
||||||
# Install uv if not already installed
|
pip3 install -U packaging setuptools wheel ninja
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
|
||||||
|
|
||||||
# Add Axolotl to a project (recommended)
|
|
||||||
uv init my-project && cd my-project
|
|
||||||
uv add axolotl
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
source .venv/bin/activate
|
|
||||||
```
|
|
||||||
|
|
||||||
For a quick one-off install without creating a project:
|
|
||||||
|
|
||||||
```{.bash}
|
|
||||||
uv pip install axolotl
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
|
||||||
|
|
||||||
### pip Installation {#sec-pypi}
|
|
||||||
|
|
||||||
```{.bash}
|
|
||||||
pip install --no-build-isolation axolotl[deepspeed]
|
|
||||||
pip install --no-build-isolation flash-attn
|
|
||||||
```
|
```
|
||||||
|
|
||||||
We use `--no-build-isolation` in order to detect the installed PyTorch version (if
|
We use `--no-build-isolation` in order to detect the installed PyTorch version (if
|
||||||
installed) in order not to clobber it, and so that we set the correct version of
|
installed) in order not to clobber it, and so that we set the correct version of
|
||||||
dependencies that are specific to the PyTorch version or other installed
|
dependencies that are specific to the PyTorch version or other installed
|
||||||
co-dependencies. Flash Attention is resolved separately so it can be built against
|
co-dependencies.
|
||||||
the environment configured by the previous step.
|
|
||||||
|
|
||||||
### Advanced uv Installation {#sec-uv}
|
### uv Installation {#sec-uv}
|
||||||
|
|
||||||
uv is a fast, reliable Python package installer and resolver built in Rust. It offers significant performance improvements over pip and provides better dependency resolution, making it an excellent choice for complex environments.
|
uv is a fast, reliable Python package installer and resolver built in Rust. It offers significant performance improvements over pip and provides better dependency resolution, making it an excellent choice for complex environments.
|
||||||
|
|
||||||
@@ -83,38 +62,28 @@ source .venv/bin/activate
|
|||||||
Install PyTorch
|
Install PyTorch
|
||||||
- PyTorch 2.6.0 recommended
|
- PyTorch 2.6.0 recommended
|
||||||
```{.bash}
|
```{.bash}
|
||||||
|
uv pip install packaging setuptools wheel
|
||||||
uv pip install torch==2.6.0
|
uv pip install torch==2.6.0
|
||||||
uv pip install awscli pydantic
|
uv pip install awscli pydantic
|
||||||
```
|
```
|
||||||
|
|
||||||
Install axolotl from PyPi
|
Install axolotl from PyPi
|
||||||
```{.bash}
|
```{.bash}
|
||||||
uv pip install --no-build-isolation axolotl[deepspeed]
|
uv pip install --no-build-isolation axolotl[deepspeed,flash-attn]
|
||||||
# optionally install with vLLM if you're using torch==2.6.0 and want to train w/ GRPO
|
|
||||||
# uv pip install --no-build-isolation axolotl[deepspeed,vllm]
|
|
||||||
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
# optionally install with vLLM if you're using torch==2.6.0 and want to train w/ GRPO
|
||||||
|
uv pip install --no-build-isolation axolotl[deepspeed,flash-attn,vllm]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Edge/Development Build {#sec-edge-build}
|
### Edge/Development Build {#sec-edge-build}
|
||||||
|
|
||||||
For the latest features between releases:
|
For the latest features between releases:
|
||||||
|
|
||||||
#### Using uv (recommended)
|
|
||||||
```{.bash}
|
```{.bash}
|
||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh # If not already installed
|
pip3 install -U packaging setuptools wheel ninja
|
||||||
uv sync
|
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Using pip
|
|
||||||
```{.bash}
|
|
||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
|
||||||
cd axolotl
|
|
||||||
pip install --no-build-isolation -e '.[deepspeed]'
|
|
||||||
pip install --no-build-isolation flash-attn
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Docker {#sec-docker}
|
### Docker {#sec-docker}
|
||||||
@@ -172,7 +141,7 @@ For providers supporting Docker:
|
|||||||
### macOS {#sec-macos}
|
### macOS {#sec-macos}
|
||||||
|
|
||||||
```{.bash}
|
```{.bash}
|
||||||
uv pip install --no-build-isolation -e '.'
|
pip3 install --no-build-isolation -e '.'
|
||||||
```
|
```
|
||||||
|
|
||||||
See @sec-troubleshooting for Mac-specific issues.
|
See @sec-troubleshooting for Mac-specific issues.
|
||||||
@@ -190,15 +159,10 @@ We recommend using WSL2 (Windows Subsystem for Linux) or Docker.
|
|||||||
1. Install Python ≥3.11
|
1. Install Python ≥3.11
|
||||||
2. Install PyTorch: https://pytorch.org/get-started/locally/
|
2. Install PyTorch: https://pytorch.org/get-started/locally/
|
||||||
3. Install Axolotl:
|
3. Install Axolotl:
|
||||||
```{.bash}
|
```{.bash}
|
||||||
# Option A: add Axolotl to the environment
|
pip3 install -U packaging setuptools wheel ninja
|
||||||
uv add axolotl
|
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||||
uv pip install flash-attn --no-build-isolation
|
```
|
||||||
|
|
||||||
# Option B: quick install
|
|
||||||
uv pip install axolotl
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
|
||||||
4. (Optional) Login to Hugging Face:
|
4. (Optional) Login to Hugging Face:
|
||||||
```{.bash}
|
```{.bash}
|
||||||
huggingface-cli login
|
huggingface-cli login
|
||||||
|
|||||||
@@ -27,3 +27,9 @@ learning_rate: 2e-5
|
|||||||
In this example, we have a default learning rate of 2e-5 across the entire model, but we have a separate learning rate
|
In this example, we have a default learning rate of 2e-5 across the entire model, but we have a separate learning rate
|
||||||
of 1e-6 for all the self attention `o_proj` modules across all layers, and a learning are of 1e-5 to the 3rd layer's
|
of 1e-6 for all the self attention `o_proj` modules across all layers, and a learning are of 1e-5 to the 3rd layer's
|
||||||
self attention `q_proj` module.
|
self attention `q_proj` module.
|
||||||
|
|
||||||
|
::: {.callout-note}
|
||||||
|
|
||||||
|
We currently only support varying `lr` for now. If you're interested in adding support for others (`weight_decay`), we welcome PRs. See https://github.com/axolotl-ai-cloud/axolotl/blob/613bcf90e58f3ab81d3827e7fc572319908db9fb/src/axolotl/core/trainers/mixins/optimizer.py#L17
|
||||||
|
|
||||||
|
:::
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ format:
|
|||||||
html:
|
html:
|
||||||
toc: true
|
toc: true
|
||||||
toc-depth: 3
|
toc-depth: 3
|
||||||
number-sections: true
|
# number-sections: true
|
||||||
code-tools: true
|
code-tools: true
|
||||||
execute:
|
execute:
|
||||||
enabled: false
|
enabled: false
|
||||||
@@ -14,12 +14,18 @@ This guide covers advanced training configurations for multi-GPU setups using Ax
|
|||||||
|
|
||||||
## Overview {#sec-overview}
|
## Overview {#sec-overview}
|
||||||
|
|
||||||
Axolotl supports several methods for multi-GPU training:
|
When training on multiple GPUs, Axolotl supports 3 sharding/parallelism strategies. Additionally, you can layer specific optimization features on top of that strategy.
|
||||||
|
|
||||||
- DeepSpeed (recommended)
|
You generally cannot combine these strategies; they are mutually exclusive.
|
||||||
- FSDP (Fully Sharded Data Parallel)
|
|
||||||
- Sequence parallelism
|
1. **DeepSpeed**: Powerful optimization library, supports ZeRO stages 1-3.
|
||||||
- FSDP + QLoRA
|
2. **FSDP (Fully Sharded Data Parallel)**: PyTorch's native sharding implementation (Recommended).
|
||||||
|
3. **DDP (Distributed Data Parallel)**: PyTorch's native parallelism implementation (Default if neither of the above are selected).
|
||||||
|
|
||||||
|
These features can often be combined with the strategies above:
|
||||||
|
|
||||||
|
* **Sequence Parallelism**: Splits long sequences across GPUs (Compatible with DDP, DeepSpeed, and FSDP).
|
||||||
|
* **FSDP + QLoRA**: Combines 4-bit quantization with FSDP (Specific to FSDP).
|
||||||
|
|
||||||
## DeepSpeed {#sec-deepspeed}
|
## DeepSpeed {#sec-deepspeed}
|
||||||
|
|
||||||
@@ -65,12 +71,18 @@ Start from Stage 1 -> Stage 2 -> Stage 3.
|
|||||||
|
|
||||||
## Fully Sharded Data Parallel (FSDP) {#sec-fsdp}
|
## Fully Sharded Data Parallel (FSDP) {#sec-fsdp}
|
||||||
|
|
||||||
|
FSDP allows you to shard model parameters, gradients, and optimizer states across data parallel workers.
|
||||||
|
|
||||||
::: {.callout-note}
|
::: {.callout-note}
|
||||||
|
|
||||||
FSDP2 is recommended for new users. FSDP1 is deprecated and will be removed in an upcoming release of Axolotl.
|
FSDP2 is recommended for new users. FSDP1 is deprecated and will be removed in an upcoming release of Axolotl.
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
### FSDP + QLoRA {#sec-fsdp-qlora}
|
||||||
|
|
||||||
|
For combining FSDP with QLoRA, see our [dedicated guide](fsdp_qlora.qmd).
|
||||||
|
|
||||||
### Migrating from FSDP1 to FSDP2 {#sec-migrate-fsdp1-fsdp2}
|
### Migrating from FSDP1 to FSDP2 {#sec-migrate-fsdp1-fsdp2}
|
||||||
|
|
||||||
To migrate your config from FSDP1 to FSDP2, you must use the `fsdp_version` top-level config field to specify the FSDP version, and
|
To migrate your config from FSDP1 to FSDP2, you must use the `fsdp_version` top-level config field to specify the FSDP version, and
|
||||||
@@ -88,6 +100,7 @@ fsdp_sync_module_states | **REMOVED**
|
|||||||
fsdp_cpu_ram_efficient_loading | cpu_ram_efficient_loading
|
fsdp_cpu_ram_efficient_loading | cpu_ram_efficient_loading
|
||||||
fsdp_state_dict_type | state_dict_type
|
fsdp_state_dict_type | state_dict_type
|
||||||
fsdp_use_orig_params | **REMOVED**
|
fsdp_use_orig_params | **REMOVED**
|
||||||
|
fsdp_activation_checkpointing | activation_checkpointing
|
||||||
|
|
||||||
For more details, please see the migration guide in the [torchtitan repo](https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md). In Axolotl,
|
For more details, please see the migration guide in the [torchtitan repo](https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md). In Axolotl,
|
||||||
if you were using the following FSDP1 config:
|
if you were using the following FSDP1 config:
|
||||||
@@ -144,10 +157,6 @@ single sequence causes OOM errors during model training.
|
|||||||
|
|
||||||
See our [dedicated guide](sequence_parallelism.qmd) for more information.
|
See our [dedicated guide](sequence_parallelism.qmd) for more information.
|
||||||
|
|
||||||
### FSDP + QLoRA {#sec-fsdp-qlora}
|
|
||||||
|
|
||||||
For combining FSDP with QLoRA, see our [dedicated guide](fsdp_qlora.qmd).
|
|
||||||
|
|
||||||
## Performance Optimization {#sec-performance}
|
## Performance Optimization {#sec-performance}
|
||||||
|
|
||||||
### Liger Kernel Integration {#sec-liger}
|
### Liger Kernel Integration {#sec-liger}
|
||||||
|
|||||||
@@ -56,10 +56,14 @@ image_resize_algorithm: bilinear
|
|||||||
|
|
||||||
Please see [examples](https://github.com/axolotl-ai/axolotl/tree/main/examples) folder for full configs.
|
Please see [examples](https://github.com/axolotl-ai/axolotl/tree/main/examples) folder for full configs.
|
||||||
|
|
||||||
::: {.callout-warning}
|
::: {.callout-tip}
|
||||||
Some of our chat_templates have been extended to support broader dataset types. This should not break any existing configs.
|
Some of our chat_templates have been extended to support broader dataset types. This should not break any existing configs.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
::: {.callout-note}
|
||||||
|
As of now, we do not truncate nor drop samples based on `sequence_len` as each arch has different ways to process non-text tokens. We are looking for help on this.
|
||||||
|
:::
|
||||||
|
|
||||||
### Mllama {#sec-mllama}
|
### Mllama {#sec-mllama}
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -95,7 +99,7 @@ chat_template: llava
|
|||||||
### Mistral-Small-3.1 {#sec-mistral-small-31}
|
### Mistral-Small-3.1 {#sec-mistral-small-31}
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install vision lib via `uv pip install 'mistral-common[opencv]==1.8.5'`
|
Please make sure to install vision lib via `pip install 'mistral-common[opencv]==1.8.5'`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -105,7 +109,7 @@ base_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503
|
|||||||
### Magistral-Small-2509 {#sec-magistral-small-2509}
|
### Magistral-Small-2509 {#sec-magistral-small-2509}
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install vision lib via `uv pip install 'mistral-common[opencv]==1.8.5'`
|
Please make sure to install vision lib via `pip install 'mistral-common[opencv]==1.8.5'`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -115,11 +119,13 @@ base_model: mistralai/Magistral-Small-2509
|
|||||||
### Voxtral {#sec-voxtral}
|
### Voxtral {#sec-voxtral}
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install audio lib via `uv pip install librosa==0.11.0 'mistral_common[audio]==1.8.3'`
|
Please make sure to install audio lib via `pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
base_model: mistralai/Voxtral-Mini-3B-2507
|
base_model: mistralai/Voxtral-Mini-3B-2507
|
||||||
|
|
||||||
|
processor_type: VoxtralProcessor
|
||||||
```
|
```
|
||||||
|
|
||||||
### Gemma-3 {#sec-gemma-3}
|
### Gemma-3 {#sec-gemma-3}
|
||||||
@@ -143,7 +149,7 @@ The model's initial loss and grad norm will be very high. We suspect this to be
|
|||||||
:::
|
:::
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install `timm` via `uv pip install timm==1.0.17`
|
Please make sure to install `timm` via `pip3 install timm==1.0.17`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -168,10 +174,18 @@ base_model: Qwen/Qwen2.5-VL-7B-Instruct
|
|||||||
chat_template: qwen2_vl # same as qwen2-vl
|
chat_template: qwen2_vl # same as qwen2-vl
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Qwen3-VL {#sec-qwen3-vl}
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
base_model: Qwen/Qwen3-VL-4B-Instruct
|
||||||
|
|
||||||
|
chat_template: qwen2_vl # same as qwen2-vl
|
||||||
|
```
|
||||||
|
|
||||||
### SmolVLM2 {#sec-smolvlm2}
|
### SmolVLM2 {#sec-smolvlm2}
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install `num2words` via `uv pip install num2words==0.5.14`
|
Please make sure to install `num2words` via `pip3 install num2words==0.5.14`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -181,7 +195,7 @@ base_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct
|
|||||||
### LFM2-VL {#sec-lfm2-vl}
|
### LFM2-VL {#sec-lfm2-vl}
|
||||||
|
|
||||||
::: {.callout-warning}
|
::: {.callout-warning}
|
||||||
Please uninstall `causal-conv1d` via `uv pip uninstall -y causal-conv1d`
|
Please uninstall `causal-conv1d` via `pip3 uninstall -y causal-conv1d`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -222,7 +236,7 @@ For audio loading, you can use the following keys within `content` alongside `"t
|
|||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
|
|
||||||
You may need to install `librosa` via `uv pip install librosa==0.11.0`.
|
You may need to install `librosa` via `pip3 install librosa==0.11.0`.
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
|||||||
125
docs/rlhf.qmd
125
docs/rlhf.qmd
@@ -219,6 +219,21 @@ DPO supports the following types with the following dataset format:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### chat_template.argilla_chat
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"chosen": [
|
||||||
|
{"role": "user", "content": "..."},
|
||||||
|
{"role": "assistant", "content": "..."}
|
||||||
|
],
|
||||||
|
"rejected": [
|
||||||
|
{"role": "user", "content": "..."},
|
||||||
|
{"role": "assistant", "content": "..."}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
#### chat_template.default
|
#### chat_template.default
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -582,6 +597,116 @@ To see other examples of custom reward functions, please see [TRL GRPO Docs](htt
|
|||||||
|
|
||||||
To see all configs, please see [TRLConfig](https://github.com/axolotl-ai-cloud/axolotl/blob/v0.9.2/src/axolotl/utils/schemas/trl.py).
|
To see all configs, please see [TRLConfig](https://github.com/axolotl-ai-cloud/axolotl/blob/v0.9.2/src/axolotl/utils/schemas/trl.py).
|
||||||
|
|
||||||
|
#### OpenEnv Rollout Functions
|
||||||
|
|
||||||
|
GRPO supports custom rollout functions for OpenEnv-style environments, enabling interactive tasks like web browsing, code execution, or tool use. This allows you to implement custom generation logic that interacts with external environments.
|
||||||
|
|
||||||
|
For example, to implement a simple math-solving environment with step-by-step verification:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# math_env.py
|
||||||
|
import re
|
||||||
|
|
||||||
|
def math_solver_rollout(model, processing_class, prompts, generation_config=None):
|
||||||
|
"""
|
||||||
|
Custom rollout function that generates step-by-step math solutions.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model: The language model
|
||||||
|
processing_class: The tokenizer/processing_class
|
||||||
|
prompts: List of prompt dicts (with 'messages' key for chat format)
|
||||||
|
generation_config: Optional generation configuration
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of completion strings
|
||||||
|
"""
|
||||||
|
completions = []
|
||||||
|
|
||||||
|
for prompt in prompts:
|
||||||
|
# Apply chat template to prompt
|
||||||
|
messages = prompt.get("messages", [])
|
||||||
|
formatted_prompt = processing_class.apply_chat_template(
|
||||||
|
messages, processing_class=False, add_generation_prompt=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate step-by-step solution
|
||||||
|
full_response = ""
|
||||||
|
for step in range(5): # Max 5 reasoning steps
|
||||||
|
current_input = formatted_prompt + full_response + "\nNext step:"
|
||||||
|
inputs = processing_class(current_input, return_tensors="pt").to(model.device)
|
||||||
|
|
||||||
|
outputs = model.generate(
|
||||||
|
**inputs,
|
||||||
|
max_new_tokens=100,
|
||||||
|
generation_config=generation_config,
|
||||||
|
)
|
||||||
|
step_text = processing_class.decode(
|
||||||
|
outputs[0][inputs.input_ids.shape[1]:],
|
||||||
|
skip_special_tokens=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if solution is complete
|
||||||
|
if "FINAL ANSWER:" in step_text:
|
||||||
|
full_response += step_text
|
||||||
|
break
|
||||||
|
full_response += step_text + "\n"
|
||||||
|
|
||||||
|
completions.append(full_response)
|
||||||
|
|
||||||
|
return completions
|
||||||
|
|
||||||
|
def math_reward(prompts, completions, answers, **kwargs):
|
||||||
|
"""Reward function that checks mathematical correctness"""
|
||||||
|
rewards = []
|
||||||
|
for completion, correct_answer in zip(completions, answers):
|
||||||
|
# Extract predicted answer
|
||||||
|
match = re.search(r"FINAL ANSWER:\s*(.+)", completion)
|
||||||
|
predicted = match.group(1).strip() if match else ""
|
||||||
|
|
||||||
|
# Compare with correct answer
|
||||||
|
reward = 1.0 if predicted == str(correct_answer) else 0.0
|
||||||
|
rewards.append(reward)
|
||||||
|
|
||||||
|
return rewards
|
||||||
|
|
||||||
|
def math_transform(cfg, *args, **kwargs):
|
||||||
|
"""Transform dataset to GRPO format with answer field"""
|
||||||
|
def transform_fn(example, processing_class=None):
|
||||||
|
return {
|
||||||
|
"prompt": [{"role": "user", "content": example["question"]}],
|
||||||
|
"answer": str(example["answer"]),
|
||||||
|
}
|
||||||
|
return transform_fn, {"remove_columns": ["question"]}
|
||||||
|
```
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
rl: grpo
|
||||||
|
|
||||||
|
trl:
|
||||||
|
beta: 0.001
|
||||||
|
max_completion_length: 512
|
||||||
|
num_generations: 4
|
||||||
|
rollout_func: "math_env.math_solver_rollout" # Custom rollout function
|
||||||
|
reward_funcs: ["math_env.math_reward"]
|
||||||
|
reward_weights: [1.0]
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- path: openai/gsm8k
|
||||||
|
name: main
|
||||||
|
type: math_env.math_transform
|
||||||
|
```
|
||||||
|
|
||||||
|
The `rollout_func` parameter accepts a fully qualified name (e.g., `module_name.function_name`) that points to a callable function in your local directory. The function receives:
|
||||||
|
|
||||||
|
- `model`: The language model
|
||||||
|
- `processing_class`: The tokenizer/processing class
|
||||||
|
- `prompts`: List of prompt dictionaries
|
||||||
|
- `generation_config` (optional): Generation configuration
|
||||||
|
|
||||||
|
And should return a list of completion strings.
|
||||||
|
|
||||||
|
For more OpenEnv examples, see [TRL OpenEnv Documentation](https://huggingface.co/docs/trl/main/en/openenv).
|
||||||
|
|
||||||
#### GRPO with DAPO/Dr. GRPO loss
|
#### GRPO with DAPO/Dr. GRPO loss
|
||||||
|
|
||||||
The DAPO paper and subsequently Dr. GRPO paper proposed an alternative loss function for GRPO to remediate the penalty in longer responses.
|
The DAPO paper and subsequently Dr. GRPO paper proposed an alternative loss function for GRPO to remediate the penalty in longer responses.
|
||||||
|
|||||||
@@ -49,9 +49,9 @@ When sequence parallelism is enabled:
|
|||||||
To use sequence parallelism, you need:
|
To use sequence parallelism, you need:
|
||||||
|
|
||||||
- Multiple GPUs (at least 2)
|
- Multiple GPUs (at least 2)
|
||||||
- The `ring-flash-attn` package. Install with either `uv sync --extra ring-flash-attn`
|
- The `ring-flash-attn` package. Install with:
|
||||||
(from a cloned repository) or `uv pip install ring-flash-attn>=0.1.4`.
|
- `pip install axolotl[ring-flash-attn]` (preferred)
|
||||||
- Flash Attention installed separately with `uv pip install flash-attn --no-build-isolation`.
|
- `pip install ring-flash-attn>=0.1.4`
|
||||||
|
|
||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
|
|||||||
61
docs/telemetry.qmd
Normal file
61
docs/telemetry.qmd
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
---
|
||||||
|
title: Telemetry
|
||||||
|
description: A description of the telemetry implementation in Axolotl.
|
||||||
|
---
|
||||||
|
|
||||||
|
# Telemetry in Axolotl
|
||||||
|
|
||||||
|
Axolotl implements anonymous telemetry to help maintainers understand how the library
|
||||||
|
is used and where users encounter issues. This data helps prioritize features, optimize
|
||||||
|
performance, and fix bugs.
|
||||||
|
|
||||||
|
## Data Collection
|
||||||
|
|
||||||
|
We collect:
|
||||||
|
|
||||||
|
- System info: OS, Python version, Axolotl version, PyTorch version, Transformers
|
||||||
|
version, etc.
|
||||||
|
- Hardware info: CPU count, memory, GPU count and models
|
||||||
|
- Runtime metrics: Training progress, memory usage, timing information
|
||||||
|
- Usage patterns: Models (from a whitelist) and configurations used
|
||||||
|
- Error tracking: Stack traces and error messages (sanitized to remove personal
|
||||||
|
information)
|
||||||
|
|
||||||
|
Personally identifiable information (PII) is not collected.
|
||||||
|
|
||||||
|
## Implementation
|
||||||
|
|
||||||
|
Telemetry is implemented using PostHog and consists of:
|
||||||
|
|
||||||
|
- `axolotl.telemetry.TelemetryManager`: A singleton class that initializes the
|
||||||
|
telemetry system and provides methods for tracking events.
|
||||||
|
- `axolotl.telemetry.errors.send_errors`: A decorator that captures exceptions and
|
||||||
|
sends sanitized stack traces.
|
||||||
|
- `axolotl.telemetry.runtime_metrics.RuntimeMetricsTracker`: A class that tracks
|
||||||
|
runtime metrics during training.
|
||||||
|
- `axolotl.telemetry.callbacks.TelemetryCallback`: A Trainer callback that sends
|
||||||
|
runtime metrics telemetry.
|
||||||
|
|
||||||
|
The telemetry system will block training startup for 10 seconds to ensure users are
|
||||||
|
aware of data collection, unless telemetry is explicitly enabled or disabled.
|
||||||
|
|
||||||
|
## Opt-Out Mechanism
|
||||||
|
|
||||||
|
Telemetry is **enabled by default** on an opt-out basis. To disable it, set
|
||||||
|
`AXOLOTL_DO_NOT_TRACK=1` or `DO_NOT_TRACK=1`.
|
||||||
|
|
||||||
|
A warning message will be logged on start to clearly inform users about telemetry.
|
||||||
|
We will remove this after some period.
|
||||||
|
|
||||||
|
To hide the warning message about telemetry that is displayed on train, etc. startup,
|
||||||
|
explicitly set: `AXOLOTL_DO_NOT_TRACK=0` (enable telemetry) or `AXOLOTL_DO_NOT_TRACK=1`
|
||||||
|
(explicitly disable telemetry).
|
||||||
|
|
||||||
|
## Privacy
|
||||||
|
|
||||||
|
- All path-like config information is automatically redacted from telemetry data
|
||||||
|
- Model information is only collected for whitelisted organizations
|
||||||
|
- See `axolotl/telemetry/whitelist.yaml` for the set of whitelisted organizations
|
||||||
|
- Each run generates a unique anonymous ID
|
||||||
|
- This allows us to link different telemetry events in a single same training run
|
||||||
|
- Telemetry is only sent from the main process to avoid duplicate events
|
||||||
@@ -6,20 +6,17 @@ LFM2 features a new hybrid Liquid architecture with multiplicative gates, short-
|
|||||||
|
|
||||||
This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
|
This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
|
||||||
|
|
||||||
|
Thanks to the team at LiquidAI for giving us early access to prepare for these releases.
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html).
|
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html).
|
||||||
|
|
||||||
Here is an example of how to install from pip:
|
Here is an example of how to install from pip:
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have a compatible version of PyTorch installed
|
# Ensure you have a compatible version of Pytorch installed
|
||||||
# Option A: manage dependencies in your project
|
pip3 install packaging setuptools wheel ninja
|
||||||
uv add 'axolotl>=0.12.0'
|
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
|
|
||||||
# Option B: quick install
|
|
||||||
uv pip install 'axolotl>=0.12.0'
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Run one of the finetuning examples below.
|
2. Run one of the finetuning examples below.
|
||||||
@@ -36,11 +33,19 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
|
|||||||
axolotl train examples/LiquidAI/lfm2-vl-lora.yaml
|
axolotl train examples/LiquidAI/lfm2-vl-lora.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**LFM2-MoE**
|
||||||
|
```bash
|
||||||
|
pip install git+https://github.com/huggingface/transformers.git@0c9a72e4576fe4c84077f066e585129c97bfd4e6
|
||||||
|
|
||||||
|
# LoRA SFT (1x48GB @ 16.2GiB)
|
||||||
|
axolotl train examples/LiquidAI/lfm2-8b-a1b-lora.yaml
|
||||||
|
```
|
||||||
|
|
||||||
### TIPS
|
### TIPS
|
||||||
|
|
||||||
- **Installation Error**: If you encounter `ImportError: ... undefined symbol ...` or `ModuleNotFoundError: No module named 'causal_conv1d_cuda'`, the `causal-conv1d` package may have been installed incorrectly. Try uninstalling it:
|
- **Installation Error**: If you encounter `ImportError: ... undefined symbol ...` or `ModuleNotFoundError: No module named 'causal_conv1d_cuda'`, the `causal-conv1d` package may have been installed incorrectly. Try uninstalling it:
|
||||||
```bash
|
```bash
|
||||||
uv pip uninstall -y causal-conv1d
|
pip uninstall -y causal-conv1d
|
||||||
```
|
```
|
||||||
|
|
||||||
- **Dataset Loading**: Read more on how to load your own dataset in our [documentation](https://docs.axolotl.ai/docs/dataset_loading.html).
|
- **Dataset Loading**: Read more on how to load your own dataset in our [documentation](https://docs.axolotl.ai/docs/dataset_loading.html).
|
||||||
@@ -50,14 +55,13 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
|
|||||||
|
|
||||||
## Optimization Guides
|
## Optimization Guides
|
||||||
|
|
||||||
- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
|
- [Optimizations Guide](https://docs.axolotl.ai/docs/optimizations.html)
|
||||||
- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
|
|
||||||
- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
|
|
||||||
|
|
||||||
## Related Resources
|
## Related Resources
|
||||||
|
|
||||||
- [LFM2 Blog](https://www.liquid.ai/blog/liquid-foundation-models-v2-our-second-series-of-generative-ai-models)
|
- [LFM2 Blog](https://www.liquid.ai/blog/liquid-foundation-models-v2-our-second-series-of-generative-ai-models)
|
||||||
- [LFM2-VL Blog](https://www.liquid.ai/blog/lfm2-vl-efficient-vision-language-models)
|
- [LFM2-VL Blog](https://www.liquid.ai/blog/lfm2-vl-efficient-vision-language-models)
|
||||||
|
- [LFM2-MoE Blog](https://www.liquid.ai/blog/lfm2-8b-a1b-an-efficient-on-device-mixture-of-experts)
|
||||||
- [Axolotl Docs](https://docs.axolotl.ai)
|
- [Axolotl Docs](https://docs.axolotl.ai)
|
||||||
- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
|
- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
|
||||||
- [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
|
- [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
base_model: LiquidAI/LFM2-350M
|
base_model: LiquidAI/LFM2-350M
|
||||||
|
|
||||||
chunked_cross_entropy: true
|
plugins:
|
||||||
|
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||||
|
|
||||||
eot_tokens:
|
eot_tokens:
|
||||||
- "<|im_end|>"
|
- "<|im_end|>"
|
||||||
|
|||||||
59
examples/LiquidAI/lfm2-8b-a1b-lora.yaml
Normal file
59
examples/LiquidAI/lfm2-8b-a1b-lora.yaml
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
base_model: LiquidAI/LFM2-8B-A1B
|
||||||
|
|
||||||
|
plugins:
|
||||||
|
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||||
|
|
||||||
|
load_in_8bit: true
|
||||||
|
|
||||||
|
eot_tokens:
|
||||||
|
- "<|im_end|>"
|
||||||
|
datasets:
|
||||||
|
- path: mlabonne/FineTome-100k
|
||||||
|
type: chat_template
|
||||||
|
split: train[:20%]
|
||||||
|
field_messages: conversations
|
||||||
|
message_field_role: from
|
||||||
|
message_field_content: value
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0.05
|
||||||
|
output_dir: ./outputs/out
|
||||||
|
|
||||||
|
sequence_len: 4096
|
||||||
|
sample_packing: true
|
||||||
|
|
||||||
|
adapter: lora
|
||||||
|
lora_model_dir:
|
||||||
|
|
||||||
|
lora_r: 32
|
||||||
|
lora_alpha: 16
|
||||||
|
lora_dropout: 0.05
|
||||||
|
lora_target_modules: 'model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
|
||||||
|
|
||||||
|
wandb_project:
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 2
|
||||||
|
micro_batch_size: 4
|
||||||
|
num_epochs: 1
|
||||||
|
optimizer: adamw_torch_fused
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 5e-5
|
||||||
|
|
||||||
|
bf16: true
|
||||||
|
tf32: true
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
resume_from_checkpoint:
|
||||||
|
logging_steps: 1
|
||||||
|
flash_attention: true
|
||||||
|
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
evals_per_epoch: 2
|
||||||
|
saves_per_epoch: 1
|
||||||
|
|
||||||
|
weight_decay: 0.0
|
||||||
|
|
||||||
|
# save_first_step: true # uncomment this to validate checkpoint saving works with your config
|
||||||
@@ -3,6 +3,9 @@ trust_remote_code: true
|
|||||||
model_type: AutoModelForImageTextToText
|
model_type: AutoModelForImageTextToText
|
||||||
processor_type: AutoProcessor
|
processor_type: AutoProcessor
|
||||||
|
|
||||||
|
plugins:
|
||||||
|
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||||
|
|
||||||
# these 3 lines are needed for now to handle vision chat templates w images
|
# these 3 lines are needed for now to handle vision chat templates w images
|
||||||
skip_prepare_dataset: true
|
skip_prepare_dataset: true
|
||||||
remove_unused_columns: false
|
remove_unused_columns: false
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
uv sync
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
uv pip install flash-attn --no-build-isolation
|
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
@@ -31,7 +31,7 @@ python scripts/cutcrossentropy_install.py | sh
|
|||||||
# For those using our Docker image, use the below path.
|
# For those using our Docker image, use the below path.
|
||||||
export CUDA_HOME=/usr/local/cuda
|
export CUDA_HOME=/usr/local/cuda
|
||||||
|
|
||||||
uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||||
```
|
```
|
||||||
|
|
||||||
For any installation errors, see [XIELU Installation Issues](#xielu-installation-issues)
|
For any installation errors, see [XIELU Installation Issues](#xielu-installation-issues)
|
||||||
@@ -67,7 +67,7 @@ If those didn't help, please try the below solutions:
|
|||||||
1. Pass env for CMAKE and try install again:
|
1. Pass env for CMAKE and try install again:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
Python_EXECUTABLE=$(which python) uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
Python_EXECUTABLE=$(which python) pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Git clone the repo and manually hardcode python path:
|
2. Git clone the repo and manually hardcode python path:
|
||||||
@@ -92,7 +92,7 @@ If those didn't help, please try the below solutions:
|
|||||||
```
|
```
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv pip install . --no-build-isolation --no-deps
|
pip3 install . --no-build-isolation --no-deps
|
||||||
```
|
```
|
||||||
|
|
||||||
## Optimization Guides
|
## Optimization Guides
|
||||||
|
|||||||
@@ -17,8 +17,8 @@ Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the A
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
uv sync
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
uv pip install flash-attn --no-build-isolation
|
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
|||||||
@@ -12,10 +12,10 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"Axolotl is the most performant LLM post-training framework available, delivering faster training with efficient, consistent and stable performance. Train your workload and ship your product 30% faster; saving you both time and money.\n",
|
"Axolotl is the most performant LLM post-training framework available, delivering faster training with efficient, consistent and stable performance. Train your workload and ship your product 30% faster; saving you both time and money.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- \u2b50 us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
|
"- ⭐ us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
|
||||||
"- \ud83d\udcdc Read the [Docs](http://docs.axolotl.ai/)\n",
|
"- 📜 Read the [Docs](http://docs.axolotl.ai/)\n",
|
||||||
"- \ud83d\udcac Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
|
"- 💬 Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
|
||||||
"- \ud83d\udcf0 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
|
"- 📰 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -39,8 +39,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"%%capture\n",
|
"%%capture\n",
|
||||||
"# This step can take ~5-10 minutes to install dependencies\n",
|
"# This step can take ~5-10 minutes to install dependencies\n",
|
||||||
"!uv pip install --no-build-isolation axolotl>=0.9.1\n!uv pip install flash-attn --no-build-isolation\n",
|
"!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
|
||||||
"!uv pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28\""
|
"!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1371,7 +1371,7 @@
|
|||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv\u2026"
|
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -1729,9 +1729,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_12815f401eba44658caa7b2e490137a8",
|
"layout": "IPY_MODEL_12815f401eba44658caa7b2e490137a8",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_30e02aa2d0d241979369e598287f2639",
|
"style": "IPY_MODEL_30e02aa2d0d241979369e598287f2639",
|
||||||
"value": "Drop\u2007Samples\u2007with\u2007Zero\u2007Trainable\u2007Tokens\u2007(num_proc=2):\u2007100%"
|
"value": "Drop Samples with Zero Trainable Tokens (num_proc=2): 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"083f9cda8d754c168beee10d2f8955a2": {
|
"083f9cda8d754c168beee10d2f8955a2": {
|
||||||
@@ -1774,9 +1774,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_b195f160ca20442fadd8b5aed0ee41af",
|
"layout": "IPY_MODEL_b195f160ca20442fadd8b5aed0ee41af",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_ca65e32eb52f48c09a84b33cb18f22cd",
|
"style": "IPY_MODEL_ca65e32eb52f48c09a84b33cb18f22cd",
|
||||||
"value": "\u200711.4M/11.4M\u2007[00:00<00:00,\u200721.8MB/s]"
|
"value": " 11.4M/11.4M [00:00<00:00, 21.8MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"0a46ad75c198463d843fb35e813642cb": {
|
"0a46ad75c198463d843fb35e813642cb": {
|
||||||
@@ -1917,7 +1917,7 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_b1bea589efa14258a9982071b87938bf",
|
"layout": "IPY_MODEL_b1bea589efa14258a9982071b87938bf",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_590eef89881545aa8bbef9a8bbe7fb00",
|
"style": "IPY_MODEL_590eef89881545aa8bbef9a8bbe7fb00",
|
||||||
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
||||||
}
|
}
|
||||||
@@ -1938,9 +1938,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_bfcdbba993b74972a9e3e575f86908ff",
|
"layout": "IPY_MODEL_bfcdbba993b74972a9e3e575f86908ff",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_6ebb2ec171414e47a14765505f64bb3c",
|
"style": "IPY_MODEL_6ebb2ec171414e47a14765505f64bb3c",
|
||||||
"value": "\u20073.84G/3.84G\u2007[00:09<00:00,\u2007664MB/s]"
|
"value": " 3.84G/3.84G [00:09<00:00, 664MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"0e936d9dbf9c4fdd86bbfe9730dedc47": {
|
"0e936d9dbf9c4fdd86bbfe9730dedc47": {
|
||||||
@@ -2296,9 +2296,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_349eee9f56d64f0cba6fc24ff2c50c9b",
|
"layout": "IPY_MODEL_349eee9f56d64f0cba6fc24ff2c50c9b",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_7e5d3774060e4589aa65982da5ea4ef4",
|
"style": "IPY_MODEL_7e5d3774060e4589aa65982da5ea4ef4",
|
||||||
"value": "\u20079985/9985\u2007[00:04<00:00,\u20072604.11\u2007examples/s]"
|
"value": " 9985/9985 [00:04<00:00, 2604.11 examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"16d1283741404b7bb319094c992fce01": {
|
"16d1283741404b7bb319094c992fce01": {
|
||||||
@@ -2317,9 +2317,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_a4e5789584564049b83df7c6c54a3e08",
|
"layout": "IPY_MODEL_a4e5789584564049b83df7c6c54a3e08",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_ff3a94b146a948b6907f5d80c7157f99",
|
"style": "IPY_MODEL_ff3a94b146a948b6907f5d80c7157f99",
|
||||||
"value": "\u20079985/0\u2007[00:00<00:00,\u200750763.46\u2007examples/s]"
|
"value": " 9985/0 [00:00<00:00, 50763.46 examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"1811cda0644e4190a9469d1774435d82": {
|
"1811cda0644e4190a9469d1774435d82": {
|
||||||
@@ -2390,9 +2390,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_e366ae3fceec4566b9ed303d6c5f90af",
|
"layout": "IPY_MODEL_e366ae3fceec4566b9ed303d6c5f90af",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_5dd7d150dbe04f08b165ce7f2c27cd11",
|
"style": "IPY_MODEL_5dd7d150dbe04f08b165ce7f2c27cd11",
|
||||||
"value": "model-00008-of-00008.safetensors:\u2007100%"
|
"value": "model-00008-of-00008.safetensors: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"19127c7bb1554ccbac877059f9a82db0": {
|
"19127c7bb1554ccbac877059f9a82db0": {
|
||||||
@@ -2561,9 +2561,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_0dea5caa27384f5689e3cab51f558727",
|
"layout": "IPY_MODEL_0dea5caa27384f5689e3cab51f558727",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_a6f48410b9964fefba0c3009a77dc838",
|
"style": "IPY_MODEL_a6f48410b9964fefba0c3009a77dc838",
|
||||||
"value": "\u20079.68k/9.68k\u2007[00:00<00:00,\u2007812kB/s]"
|
"value": " 9.68k/9.68k [00:00<00:00, 812kB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"1f7d30f71bbd4547a9150d21da071055": {
|
"1f7d30f71bbd4547a9150d21da071055": {
|
||||||
@@ -2634,9 +2634,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_f4a1795dc7514a718f478245f521f0ba",
|
"layout": "IPY_MODEL_f4a1795dc7514a718f478245f521f0ba",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_5e746eb25bbe416fb585fa24e79f5177",
|
"style": "IPY_MODEL_5e746eb25bbe416fb585fa24e79f5177",
|
||||||
"value": "model-00002-of-00008.safetensors:\u2007100%"
|
"value": "model-00002-of-00008.safetensors: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"20352e5f58d24bb8b1f3940efd14fe4a": {
|
"20352e5f58d24bb8b1f3940efd14fe4a": {
|
||||||
@@ -2707,9 +2707,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_1c6f1f10667545aaab958016ba7e2c94",
|
"layout": "IPY_MODEL_1c6f1f10667545aaab958016ba7e2c94",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_e6e969610738449887259063967f82b0",
|
"style": "IPY_MODEL_e6e969610738449887259063967f82b0",
|
||||||
"value": "\u20072.78M/2.78M\u2007[00:00<00:00,\u200717.8MB/s]"
|
"value": " 2.78M/2.78M [00:00<00:00, 17.8MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"258b7c635c1045329d4669e48c46ccd5": {
|
"258b7c635c1045329d4669e48c46ccd5": {
|
||||||
@@ -3056,9 +3056,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_be724f04b03942b2a033a7e8898bb4fd",
|
"layout": "IPY_MODEL_be724f04b03942b2a033a7e8898bb4fd",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_fcbab4d8dced41a18dfccce81e3a45a0",
|
"style": "IPY_MODEL_fcbab4d8dced41a18dfccce81e3a45a0",
|
||||||
"value": "model-00005-of-00008.safetensors:\u2007100%"
|
"value": "model-00005-of-00008.safetensors: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"3036608c71904ce9ae4bb2a9fa8802d9": {
|
"3036608c71904ce9ae4bb2a9fa8802d9": {
|
||||||
@@ -3077,9 +3077,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_5ca6be24acb548cea130bd58e9954c7c",
|
"layout": "IPY_MODEL_5ca6be24acb548cea130bd58e9954c7c",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_5cfb02ee044b4011a378efa8b54a370f",
|
"style": "IPY_MODEL_5cfb02ee044b4011a378efa8b54a370f",
|
||||||
"value": "\u20073.96G/3.96G\u2007[00:10<00:00,\u2007531MB/s]"
|
"value": " 3.96G/3.96G [00:10<00:00, 531MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"30a81da86f8043eca301e86a8651201a": {
|
"30a81da86f8043eca301e86a8651201a": {
|
||||||
@@ -3629,9 +3629,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_8f5bd719974e41c3a8dd9a5b0d3d71e6",
|
"layout": "IPY_MODEL_8f5bd719974e41c3a8dd9a5b0d3d71e6",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_b87c84de30e84b3abf4871461fb9cbd3",
|
"style": "IPY_MODEL_b87c84de30e84b3abf4871461fb9cbd3",
|
||||||
"value": "Loading\u2007checkpoint\u2007shards:\u2007100%"
|
"value": "Loading checkpoint shards: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"41f3b32c2f6b4034ae7a3b9124e28bc7": {
|
"41f3b32c2f6b4034ae7a3b9124e28bc7": {
|
||||||
@@ -3791,7 +3791,7 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_39789237703c4a418134243055c9cbf5",
|
"layout": "IPY_MODEL_39789237703c4a418134243055c9cbf5",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_a3a945817f684328b34651fe052393ec",
|
"style": "IPY_MODEL_a3a945817f684328b34651fe052393ec",
|
||||||
"value": "Connecting..."
|
"value": "Connecting..."
|
||||||
}
|
}
|
||||||
@@ -4077,9 +4077,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_4d468f96ec924681ad65eb671674b93e",
|
"layout": "IPY_MODEL_4d468f96ec924681ad65eb671674b93e",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_ad7599de524549c48bf2d3124ad4b299",
|
"style": "IPY_MODEL_ad7599de524549c48bf2d3124ad4b299",
|
||||||
"value": "Dropping\u2007Long\u2007Sequences\u2007(num_proc=2):\u2007100%"
|
"value": "Dropping Long Sequences (num_proc=2): 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"5ca240f31e6b44e3882c5eb37cd5a309": {
|
"5ca240f31e6b44e3882c5eb37cd5a309": {
|
||||||
@@ -4471,9 +4471,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_5e18768f7ad6434ba8b8b8a2e853e204",
|
"layout": "IPY_MODEL_5e18768f7ad6434ba8b8b8a2e853e204",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_bb33aec33a6447078c31bfd728942994",
|
"style": "IPY_MODEL_bb33aec33a6447078c31bfd728942994",
|
||||||
"value": "\u2007728/728\u2007[00:00<00:00,\u200720.3kB/s]"
|
"value": " 728/728 [00:00<00:00, 20.3kB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"62e302ebdad64aada0ffe64ae1c873f3": {
|
"62e302ebdad64aada0ffe64ae1c873f3": {
|
||||||
@@ -4636,9 +4636,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_81c3db71ac704280ad030072655f1537",
|
"layout": "IPY_MODEL_81c3db71ac704280ad030072655f1537",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_042e091f75694c47aee761e760e76773",
|
"style": "IPY_MODEL_042e091f75694c47aee761e760e76773",
|
||||||
"value": "\u20079985/9985\u2007[00:02<00:00,\u20073977.47\u2007examples/s]"
|
"value": " 9985/9985 [00:02<00:00, 3977.47 examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"67da6c4260574869aa24c3cbc1bc1654": {
|
"67da6c4260574869aa24c3cbc1bc1654": {
|
||||||
@@ -4778,7 +4778,7 @@
|
|||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"disabled": false,
|
"disabled": false,
|
||||||
"layout": "IPY_MODEL_2e257c8be2da40b4bb67a9e4ab6811f3",
|
"layout": "IPY_MODEL_2e257c8be2da40b4bb67a9e4ab6811f3",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_56e3768bef5a4b9db4168c5c17f509c2",
|
"style": "IPY_MODEL_56e3768bef5a4b9db4168c5c17f509c2",
|
||||||
"value": ""
|
"value": ""
|
||||||
}
|
}
|
||||||
@@ -4823,9 +4823,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_41f3b32c2f6b4034ae7a3b9124e28bc7",
|
"layout": "IPY_MODEL_41f3b32c2f6b4034ae7a3b9124e28bc7",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_a10d0a76010f4e508c65a9b69ebc5156",
|
"style": "IPY_MODEL_a10d0a76010f4e508c65a9b69ebc5156",
|
||||||
"value": "Tokenizing\u2007Prompts\u2007(num_proc=2):\u2007100%"
|
"value": "Tokenizing Prompts (num_proc=2): 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"704f2f5a9b1c49d5a75a0025a5dda11b": {
|
"704f2f5a9b1c49d5a75a0025a5dda11b": {
|
||||||
@@ -5071,9 +5071,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_93a44a11aa4846fa8efc6c1413ef1627",
|
"layout": "IPY_MODEL_93a44a11aa4846fa8efc6c1413ef1627",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_a55060adc3564407ac81ad7297d34aaa",
|
"style": "IPY_MODEL_a55060adc3564407ac81ad7297d34aaa",
|
||||||
"value": "train.jsonl:\u2007100%"
|
"value": "train.jsonl: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"7be6f04c284e4326bb4ff3d301e7b3c6": {
|
"7be6f04c284e4326bb4ff3d301e7b3c6": {
|
||||||
@@ -5138,9 +5138,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_7fd44cf9ca6e4726bfd7ac21846d6a14",
|
"layout": "IPY_MODEL_7fd44cf9ca6e4726bfd7ac21846d6a14",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_366a343b62fa47d8985a3bd464d99f9e",
|
"style": "IPY_MODEL_366a343b62fa47d8985a3bd464d99f9e",
|
||||||
"value": "config.json:\u2007100%"
|
"value": "config.json: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"7cd0b85ebd204b7aba908417811ce4e0": {
|
"7cd0b85ebd204b7aba908417811ce4e0": {
|
||||||
@@ -5339,9 +5339,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_67da6c4260574869aa24c3cbc1bc1654",
|
"layout": "IPY_MODEL_67da6c4260574869aa24c3cbc1bc1654",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_94b9088614464f60a203de39dbcae853",
|
"style": "IPY_MODEL_94b9088614464f60a203de39dbcae853",
|
||||||
"value": "\u20078/8\u2007[01:47<00:00,\u200711.64s/it]"
|
"value": " 8/8 [01:47<00:00, 11.64s/it]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"823f1c78f15043e38bbd4dca3932a86a": {
|
"823f1c78f15043e38bbd4dca3932a86a": {
|
||||||
@@ -5488,7 +5488,7 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_8640ac440fbc4644b9a3af7ba3ae7183",
|
"layout": "IPY_MODEL_8640ac440fbc4644b9a3af7ba3ae7183",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_5cea7996f02040b187ece0bb2d6a8d1f",
|
"style": "IPY_MODEL_5cea7996f02040b187ece0bb2d6a8d1f",
|
||||||
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
||||||
}
|
}
|
||||||
@@ -5509,9 +5509,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_ef223e8504b64e3592589880326aaf41",
|
"layout": "IPY_MODEL_ef223e8504b64e3592589880326aaf41",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_598da69727bd4fb8b1caf465ac736d7a",
|
"style": "IPY_MODEL_598da69727bd4fb8b1caf465ac736d7a",
|
||||||
"value": "\u20071.67M/1.67M\u2007[00:00<00:00,\u200719.0MB/s]"
|
"value": " 1.67M/1.67M [00:00<00:00, 19.0MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"897b77a56c09479bb11d7f2a30997e55": {
|
"897b77a56c09479bb11d7f2a30997e55": {
|
||||||
@@ -5717,9 +5717,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_37de928300e34184881039378bd75e7f",
|
"layout": "IPY_MODEL_37de928300e34184881039378bd75e7f",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_0e936d9dbf9c4fdd86bbfe9730dedc47",
|
"style": "IPY_MODEL_0e936d9dbf9c4fdd86bbfe9730dedc47",
|
||||||
"value": "\u20073.96G/3.96G\u2007[00:13<00:00,\u2007273MB/s]"
|
"value": " 3.96G/3.96G [00:13<00:00, 273MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"936d04b5fe1b4c63bf0b080e423d051b": {
|
"936d04b5fe1b4c63bf0b080e423d051b": {
|
||||||
@@ -6050,9 +6050,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_d955dcaa0e944e719f3a06139dd54a03",
|
"layout": "IPY_MODEL_d955dcaa0e944e719f3a06139dd54a03",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_d3de2662c7964f1ba96e58da382af720",
|
"style": "IPY_MODEL_d3de2662c7964f1ba96e58da382af720",
|
||||||
"value": "merges.txt:\u2007100%"
|
"value": "merges.txt: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"9cd5211b5d8b457aa0002f1d17b80028": {
|
"9cd5211b5d8b457aa0002f1d17b80028": {
|
||||||
@@ -6071,9 +6071,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_6932489232ec4ab18a160b1e7fbcdfe1",
|
"layout": "IPY_MODEL_6932489232ec4ab18a160b1e7fbcdfe1",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_4540927d98f54466b434ba4c0edf045d",
|
"style": "IPY_MODEL_4540927d98f54466b434ba4c0edf045d",
|
||||||
"value": "model-00007-of-00008.safetensors:\u2007100%"
|
"value": "model-00007-of-00008.safetensors: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"9d4897eefb5f48259ffb2d23e332f752": {
|
"9d4897eefb5f48259ffb2d23e332f752": {
|
||||||
@@ -6303,9 +6303,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_3aaecbf540f54a2db9ab0931e3b1fe57",
|
"layout": "IPY_MODEL_3aaecbf540f54a2db9ab0931e3b1fe57",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_9e333ed3b5014069ac1dd969255dd591",
|
"style": "IPY_MODEL_9e333ed3b5014069ac1dd969255dd591",
|
||||||
"value": "\u2007239/239\u2007[00:00<00:00,\u200730.9kB/s]"
|
"value": " 239/239 [00:00<00:00, 30.9kB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"a20927bf5f2c41f58c1e31ac858ab36c": {
|
"a20927bf5f2c41f58c1e31ac858ab36c": {
|
||||||
@@ -6324,9 +6324,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_1811cda0644e4190a9469d1774435d82",
|
"layout": "IPY_MODEL_1811cda0644e4190a9469d1774435d82",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_35c811d2ae8e43f3b5cecbdd3cfa857f",
|
"style": "IPY_MODEL_35c811d2ae8e43f3b5cecbdd3cfa857f",
|
||||||
"value": "tokenizer.json:\u2007100%"
|
"value": "tokenizer.json: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"a3a945817f684328b34651fe052393ec": {
|
"a3a945817f684328b34651fe052393ec": {
|
||||||
@@ -6360,9 +6360,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_ed5ca967ad5342929e578ac6aa4dc4c0",
|
"layout": "IPY_MODEL_ed5ca967ad5342929e578ac6aa4dc4c0",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_af401d117d5047629d3a6e2361757b62",
|
"style": "IPY_MODEL_af401d117d5047629d3a6e2361757b62",
|
||||||
"value": "model-00001-of-00008.safetensors:\u2007100%"
|
"value": "model-00001-of-00008.safetensors: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"a4e5789584564049b83df7c6c54a3e08": {
|
"a4e5789584564049b83df7c6c54a3e08": {
|
||||||
@@ -6494,9 +6494,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_fa1282ccc7544e4f818e2f03ccffe4a5",
|
"layout": "IPY_MODEL_fa1282ccc7544e4f818e2f03ccffe4a5",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_bbbf575d2a4b4c6ea8389be79b2a6039",
|
"style": "IPY_MODEL_bbbf575d2a4b4c6ea8389be79b2a6039",
|
||||||
"value": "model.safetensors.index.json:\u2007100%"
|
"value": "model.safetensors.index.json: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ab93eabd7cea4b94b4b7a387f101e8a1": {
|
"ab93eabd7cea4b94b4b7a387f101e8a1": {
|
||||||
@@ -6582,9 +6582,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_62e302ebdad64aada0ffe64ae1c873f3",
|
"layout": "IPY_MODEL_62e302ebdad64aada0ffe64ae1c873f3",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_bd1b0dfed6d34d16af33a4a58330f5ec",
|
"style": "IPY_MODEL_bd1b0dfed6d34d16af33a4a58330f5ec",
|
||||||
"value": "Saving\u2007the\u2007dataset\u2007(1/1\u2007shards):\u2007100%"
|
"value": "Saving the dataset (1/1 shards): 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ad7599de524549c48bf2d3124ad4b299": {
|
"ad7599de524549c48bf2d3124ad4b299": {
|
||||||
@@ -6967,9 +6967,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_2b3a2659b12244bd8548320320016dbf",
|
"layout": "IPY_MODEL_2b3a2659b12244bd8548320320016dbf",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_0cd7efffbb3c4c4b972e63749f61ab97",
|
"style": "IPY_MODEL_0cd7efffbb3c4c4b972e63749f61ab97",
|
||||||
"value": "Generating\u2007train\u2007split:\u2007"
|
"value": "Generating train split: "
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"b87c84de30e84b3abf4871461fb9cbd3": {
|
"b87c84de30e84b3abf4871461fb9cbd3": {
|
||||||
@@ -7085,9 +7085,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_0f480e3a0b0a45d2a2d2dec3cad923f3",
|
"layout": "IPY_MODEL_0f480e3a0b0a45d2a2d2dec3cad923f3",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_fcb30372e7404c5d8a1ad4df91e6c7b2",
|
"style": "IPY_MODEL_fcb30372e7404c5d8a1ad4df91e6c7b2",
|
||||||
"value": "\u20071.91G/1.91G\u2007[00:05<00:00,\u2007444MB/s]"
|
"value": " 1.91G/1.91G [00:05<00:00, 444MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"bd1b0dfed6d34d16af33a4a58330f5ec": {
|
"bd1b0dfed6d34d16af33a4a58330f5ec": {
|
||||||
@@ -7325,9 +7325,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_158c8b85dbf34de6a94b4e35e2fc7d5a",
|
"layout": "IPY_MODEL_158c8b85dbf34de6a94b4e35e2fc7d5a",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_0b4c9753a7cb4354b8e5f187e6e1ad7c",
|
"style": "IPY_MODEL_0b4c9753a7cb4354b8e5f187e6e1ad7c",
|
||||||
"value": "\u20073.96G/3.96G\u2007[00:15<00:00,\u2007564MB/s]"
|
"value": " 3.96G/3.96G [00:15<00:00, 564MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c0991cf63ee6458b96e9a75e7a88b61a": {
|
"c0991cf63ee6458b96e9a75e7a88b61a": {
|
||||||
@@ -7346,9 +7346,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_ed28e2e0410d4e0b855467e798e53d66",
|
"layout": "IPY_MODEL_ed28e2e0410d4e0b855467e798e53d66",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_d93f134f802b4b69b575bdaf07dbd27c",
|
"style": "IPY_MODEL_d93f134f802b4b69b575bdaf07dbd27c",
|
||||||
"value": "tokenizer_config.json:\u2007100%"
|
"value": "tokenizer_config.json: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c12ea43372ac4d57bb9605f1a429b397": {
|
"c12ea43372ac4d57bb9605f1a429b397": {
|
||||||
@@ -7581,9 +7581,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_8bc9d8ba866c442b9118d9630009939c",
|
"layout": "IPY_MODEL_8bc9d8ba866c442b9118d9630009939c",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_9f56a2d9979c4bd8928c644c22c3ecdf",
|
"style": "IPY_MODEL_9f56a2d9979c4bd8928c644c22c3ecdf",
|
||||||
"value": "model-00003-of-00008.safetensors:\u2007100%"
|
"value": "model-00003-of-00008.safetensors: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c6164e05a1914ae48083db9ad7f4ef7c": {
|
"c6164e05a1914ae48083db9ad7f4ef7c": {
|
||||||
@@ -7694,9 +7694,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_e40d1c1ac9494b3bade9858324e7ffdf",
|
"layout": "IPY_MODEL_e40d1c1ac9494b3bade9858324e7ffdf",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_d65b6b060d9845779299491ac5599c31",
|
"style": "IPY_MODEL_d65b6b060d9845779299491ac5599c31",
|
||||||
"value": "\u20079985/9985\u2007[01:04<00:00,\u2007189.08\u2007examples/s]"
|
"value": " 9985/9985 [01:04<00:00, 189.08 examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c7433acd3c4841e6958ae8f7e87b1808": {
|
"c7433acd3c4841e6958ae8f7e87b1808": {
|
||||||
@@ -7737,9 +7737,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_0077aedc3d174560bce924ee89e9c006",
|
"layout": "IPY_MODEL_0077aedc3d174560bce924ee89e9c006",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_00321cce58884f6f9b3855a21fcd9187",
|
"style": "IPY_MODEL_00321cce58884f6f9b3855a21fcd9187",
|
||||||
"value": "Add\u2007position_id\u2007column\u2007(Sample\u2007Packing)\u2007(num_proc=2):\u2007100%"
|
"value": "Add position_id column (Sample Packing) (num_proc=2): 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ca65e32eb52f48c09a84b33cb18f22cd": {
|
"ca65e32eb52f48c09a84b33cb18f22cd": {
|
||||||
@@ -8162,9 +8162,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_63580b6fb30642479fe3000915bf551a",
|
"layout": "IPY_MODEL_63580b6fb30642479fe3000915bf551a",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_8f726dbfb45d4528afa33e36a6313267",
|
"style": "IPY_MODEL_8f726dbfb45d4528afa33e36a6313267",
|
||||||
"value": "\u200727.3M/27.3M\u2007[00:00<00:00,\u200731.0MB/s]"
|
"value": " 27.3M/27.3M [00:00<00:00, 31.0MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"d43c6df07ddb466587807d6dbe1ff614": {
|
"d43c6df07ddb466587807d6dbe1ff614": {
|
||||||
@@ -8183,9 +8183,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_8c4d4fc5a30f4e7cb3be53fe2adda33d",
|
"layout": "IPY_MODEL_8c4d4fc5a30f4e7cb3be53fe2adda33d",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_e90658f4bcb642baa78426012f863152",
|
"style": "IPY_MODEL_e90658f4bcb642baa78426012f863152",
|
||||||
"value": "model-00004-of-00008.safetensors:\u2007100%"
|
"value": "model-00004-of-00008.safetensors: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"d65b6b060d9845779299491ac5599c31": {
|
"d65b6b060d9845779299491ac5599c31": {
|
||||||
@@ -8474,9 +8474,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_34cf3df51fbc41cabfdbba153c007f0e",
|
"layout": "IPY_MODEL_34cf3df51fbc41cabfdbba153c007f0e",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_ac764024cf1c4e08ba7749afd2cd20ac",
|
"style": "IPY_MODEL_ac764024cf1c4e08ba7749afd2cd20ac",
|
||||||
"value": "vocab.json:\u2007100%"
|
"value": "vocab.json: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"dfd2a2649b8341ef913207526708aff1": {
|
"dfd2a2649b8341ef913207526708aff1": {
|
||||||
@@ -8669,9 +8669,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_c6164e05a1914ae48083db9ad7f4ef7c",
|
"layout": "IPY_MODEL_c6164e05a1914ae48083db9ad7f4ef7c",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_813621384dc748b0ad06775e22761c0b",
|
"style": "IPY_MODEL_813621384dc748b0ad06775e22761c0b",
|
||||||
"value": "\u20079985/9985\u2007[00:03<00:00,\u20073622.89\u2007examples/s]"
|
"value": " 9985/9985 [00:03<00:00, 3622.89 examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"e400cbf14bcc446a9d33b210cd93550b": {
|
"e400cbf14bcc446a9d33b210cd93550b": {
|
||||||
@@ -9065,9 +9065,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_fba7aa824b38467ab3061b226114cdec",
|
"layout": "IPY_MODEL_fba7aa824b38467ab3061b226114cdec",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_f3075dccbd2747b4a7913b66f44f2596",
|
"style": "IPY_MODEL_f3075dccbd2747b4a7913b66f44f2596",
|
||||||
"value": "\u20073.96G/3.96G\u2007[00:13<00:00,\u2007398MB/s]"
|
"value": " 3.96G/3.96G [00:13<00:00, 398MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ec030fc3c346426f9abc3a89892258d3": {
|
"ec030fc3c346426f9abc3a89892258d3": {
|
||||||
@@ -9110,9 +9110,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_936d04b5fe1b4c63bf0b080e423d051b",
|
"layout": "IPY_MODEL_936d04b5fe1b4c63bf0b080e423d051b",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_f1cef8e8dc2646fb9fd09f3b09081074",
|
"style": "IPY_MODEL_f1cef8e8dc2646fb9fd09f3b09081074",
|
||||||
"value": "\u200736.5k/36.5k\u2007[00:00<00:00,\u20074.32MB/s]"
|
"value": " 36.5k/36.5k [00:00<00:00, 4.32MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ed28e2e0410d4e0b855467e798e53d66": {
|
"ed28e2e0410d4e0b855467e798e53d66": {
|
||||||
@@ -9422,9 +9422,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_735d4f225b24414294fc1b213c61223c",
|
"layout": "IPY_MODEL_735d4f225b24414294fc1b213c61223c",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_5e5e15b0569b474c9620083b3ec6af55",
|
"style": "IPY_MODEL_5e5e15b0569b474c9620083b3ec6af55",
|
||||||
"value": "generation_config.json:\u2007100%"
|
"value": "generation_config.json: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"f4667818b9d34a09891cd727a429a610": {
|
"f4667818b9d34a09891cd727a429a610": {
|
||||||
@@ -9443,9 +9443,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_4b27c267393640f28f6eae0875bd2ed9",
|
"layout": "IPY_MODEL_4b27c267393640f28f6eae0875bd2ed9",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_9858cb74a09748a39e8149baac96702c",
|
"style": "IPY_MODEL_9858cb74a09748a39e8149baac96702c",
|
||||||
"value": "\u20073.96G/3.96G\u2007[00:11<00:00,\u2007457MB/s]"
|
"value": " 3.96G/3.96G [00:11<00:00, 457MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"f4a1795dc7514a718f478245f521f0ba": {
|
"f4a1795dc7514a718f478245f521f0ba": {
|
||||||
@@ -9830,9 +9830,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_d1f9b10c130542f094c8fd3d1e23b5e9",
|
"layout": "IPY_MODEL_d1f9b10c130542f094c8fd3d1e23b5e9",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_e575d87a7efe4ec7b1efde489839d4a6",
|
"style": "IPY_MODEL_e575d87a7efe4ec7b1efde489839d4a6",
|
||||||
"value": "model-00006-of-00008.safetensors:\u2007100%"
|
"value": "model-00006-of-00008.safetensors: 100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"fe18bba7f3fb4c31bf840541f36b3425": {
|
"fe18bba7f3fb4c31bf840541f36b3425": {
|
||||||
@@ -9873,9 +9873,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_e5a82df528bb4e408797a3b6c2758f4a",
|
"layout": "IPY_MODEL_e5a82df528bb4e408797a3b6c2758f4a",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_f113ebd8c1c34806bea4dd7ed3035173",
|
"style": "IPY_MODEL_f113ebd8c1c34806bea4dd7ed3035173",
|
||||||
"value": "\u20079985/9985\u2007[00:00<00:00,\u200744264.88\u2007examples/s]"
|
"value": " 9985/9985 [00:00<00:00, 44264.88 examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"fea1b70fb46745feb5111b3929175b5d": {
|
"fea1b70fb46745feb5111b3929175b5d": {
|
||||||
@@ -9931,9 +9931,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_ab93eabd7cea4b94b4b7a387f101e8a1",
|
"layout": "IPY_MODEL_ab93eabd7cea4b94b4b7a387f101e8a1",
|
||||||
"placeholder": "\u200b",
|
"placeholder": "",
|
||||||
"style": "IPY_MODEL_704f2f5a9b1c49d5a75a0025a5dda11b",
|
"style": "IPY_MODEL_704f2f5a9b1c49d5a75a0025a5dda11b",
|
||||||
"value": "\u20073.96G/3.96G\u2007[00:12<00:00,\u2007656MB/s]"
|
"value": " 3.96G/3.96G [00:12<00:00, 656MB/s]"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,13 +16,8 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||||
# Option A: manage dependencies in your project
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
uv add 'axolotl>=0.12.0'
|
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
|
|
||||||
# Option B: quick install
|
|
||||||
uv pip install 'axolotl>=0.12.0'
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
base_model: google/gemma-3-1b-it
|
base_model: google/gemma-3-1b-it
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: Gemma3ForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
# Automatically upload checkpoint and final model to HF
|
||||||
# hub_model_id: username/custom_model_name
|
# hub_model_id: username/custom_model_name
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
base_model: google/gemma-3-270m-it
|
base_model: google/gemma-3-270m-it
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: Gemma3ForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
# Automatically upload checkpoint and final model to HF
|
||||||
# hub_model_id: username/custom_model_name
|
# hub_model_id: username/custom_model_name
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
base_model: google/gemma-3-4b-it
|
base_model: google/gemma-3-4b-it
|
||||||
|
|
||||||
|
# Need to set else transformers tries to load vision too
|
||||||
|
model_type: Gemma3ForCausalLM
|
||||||
|
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|
||||||
# gemma3 doesn't seem to play nice with ddp
|
# gemma3 doesn't seem to play nice with ddp
|
||||||
|
|||||||
@@ -10,22 +10,17 @@ Gemma-3n is a family of multimodal models from Google found on [HuggingFace](htt
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||||
# Option A: manage dependencies in your project
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
uv add 'axolotl>=0.12.0'
|
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
|
|
||||||
# Option B: quick install
|
|
||||||
uv pip install 'axolotl>=0.12.0'
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. In addition to Axolotl's requirements, Gemma-3n requires:
|
2. In addition to Axolotl's requirements, Gemma-3n requires:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv pip install timm==1.0.17
|
pip3 install timm==1.0.17
|
||||||
|
|
||||||
# for loading audio data
|
# for loading audio data
|
||||||
uv pip install librosa==0.11.0
|
pip3 install librosa==0.11.0
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Download sample dataset files
|
3. Download sample dataset files
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
[GPT-OSS](https://huggingface.co/collections/openai/gpt-oss-68911959590a1634ba11c7a4) are a family of open-weight MoE models trained by OpenAI, released in August 2025. There are two variants: 20B and 120B.
|
[GPT-OSS](https://huggingface.co/collections/openai/gpt-oss-68911959590a1634ba11c7a4) are a family of open-weight MoE models trained by OpenAI, released in August 2025. There are two variants: 20B and 120B.
|
||||||
|
|
||||||
|
In October 2025, OpenAI released safeguard models built upon GPT-OSS called [GPT-OSS-Safeguard](https://huggingface.co/collections/openai/gpt-oss-safeguard). They use the same architecture, so the same examples below can be re-used.
|
||||||
|
|
||||||
This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking.
|
This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking.
|
||||||
|
|
||||||
## Getting started
|
## Getting started
|
||||||
@@ -12,13 +14,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||||
# Option A: manage dependencies in your project
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
uv add 'axolotl>=0.12.0'
|
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
|
|
||||||
# Option B: quick install
|
|
||||||
uv pip install 'axolotl>=0.12.0'
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Choose one of the following configs below for training the 20B model. (for 120B, see [below](#training-120b))
|
2. Choose one of the following configs below for training the 20B model. (for 120B, see [below](#training-120b))
|
||||||
@@ -69,6 +66,16 @@ axolotl merge-sharded-fsdp-weights examples/gpt-oss/gpt-oss-120b-fft-fsdp2-offlo
|
|||||||
mv ./outputs/gpt-oss-out/merged/* ./outputs/gpt-oss-out/
|
mv ./outputs/gpt-oss-out/merged/* ./outputs/gpt-oss-out/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### How to set reasoning_effort in template?
|
||||||
|
|
||||||
|
The harmony template has a feature to set the `reasoning_effort` during prompt building. The default is `medium`. If you would like to adjust this, you can add the following to your config:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
chat_template_kwargs:
|
||||||
|
reasoning_effort: "high" # low | medium | high
|
||||||
|
```
|
||||||
|
|
||||||
|
Currently, this applies globally. There is no method to apply per sample yet. If you are interested in adding this, please feel free to create an Issue to discuss.
|
||||||
|
|
||||||
### Inferencing your fine-tuned model
|
### Inferencing your fine-tuned model
|
||||||
|
|
||||||
@@ -80,7 +87,7 @@ for more information about using a special vllm-openai docker image for inferenc
|
|||||||
Optionally, vLLM can be installed from nightly:
|
Optionally, vLLM can be installed from nightly:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly
|
pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly
|
||||||
```
|
```
|
||||||
and the vLLM server can be started with the following command (modify `--tensor-parallel-size 8` to match your environment):
|
and the vLLM server can be started with the following command (modify `--tensor-parallel-size 8` to match your environment):
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -0,0 +1,67 @@
|
|||||||
|
base_model: openai/gpt-oss-safeguard-20b
|
||||||
|
use_kernels: true
|
||||||
|
model_quantization_config: Mxfp4Config
|
||||||
|
model_quantization_config_kwargs:
|
||||||
|
dequantize: true
|
||||||
|
|
||||||
|
plugins:
|
||||||
|
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||||
|
|
||||||
|
experimental_skip_move_to_device: true # prevent OOM by not putting model to GPU before sharding
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- path: HuggingFaceH4/Multilingual-Thinking
|
||||||
|
type: chat_template
|
||||||
|
field_thinking: thinking
|
||||||
|
template_thinking_key: thinking
|
||||||
|
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0
|
||||||
|
output_dir: ./outputs/gpt-oss-safeguard-out/
|
||||||
|
|
||||||
|
sequence_len: 4096
|
||||||
|
sample_packing: true
|
||||||
|
|
||||||
|
adapter: lora
|
||||||
|
lora_r: 8
|
||||||
|
lora_alpha: 16
|
||||||
|
lora_dropout: 0.0 # dropout not supported when using LoRA over expert parameters
|
||||||
|
lora_target_linear: true
|
||||||
|
|
||||||
|
# TODO: not supported for now, see peft#2710
|
||||||
|
#lora_target_parameters: # target the experts in the last two layers
|
||||||
|
# - "22._checkpoint_wrapped_module.mlp.experts.gate_up_proj"
|
||||||
|
# - "22._checkpoint_wrapped_module.mlp.experts.down_proj"
|
||||||
|
# - "23._checkpoint_wrapped_module.mlp.experts.gate_up_proj"
|
||||||
|
# - "23._checkpoint_wrapped_module.mlp.experts.down_proj"
|
||||||
|
|
||||||
|
wandb_project:
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
micro_batch_size: 1
|
||||||
|
num_epochs: 1
|
||||||
|
|
||||||
|
optimizer: adamw_torch_8bit
|
||||||
|
lr_scheduler: constant_with_warmup
|
||||||
|
learning_rate: 2e-4
|
||||||
|
|
||||||
|
bf16: true
|
||||||
|
tf32: true
|
||||||
|
|
||||||
|
flash_attention: true
|
||||||
|
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
activation_offloading: true
|
||||||
|
|
||||||
|
logging_steps: 1
|
||||||
|
saves_per_epoch: 1
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
|
||||||
|
special_tokens:
|
||||||
|
eot_tokens:
|
||||||
|
- "<|end|>"
|
||||||
65
examples/granite4/README.md
Normal file
65
examples/granite4/README.md
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
# Finetune IBM's Granite 4.0 with Axolotl
|
||||||
|
|
||||||
|
[Granite 4.0](https://huggingface.co/collections/ibm-granite/granite-40-language-models) are a family of open source models trained by IBM Research.
|
||||||
|
|
||||||
|
This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking.
|
||||||
|
|
||||||
|
## Getting started
|
||||||
|
|
||||||
|
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). You need to install from main as Granite4 is only on nightly or use our latest [Docker images](https://docs.axolotl.ai/docs/docker.html).
|
||||||
|
|
||||||
|
Here is an example of how to install from main for pip:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Ensure you have Pytorch installed (Pytorch 2.7.1 min)
|
||||||
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
|
cd axolotl
|
||||||
|
|
||||||
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
|
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||||
|
|
||||||
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run the finetuning example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
axolotl train examples/granite4/granite-4.0-tiny-fft.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
This config uses about 40.8GiB VRAM.
|
||||||
|
|
||||||
|
Let us know how it goes. Happy finetuning! 🚀
|
||||||
|
|
||||||
|
### TIPS
|
||||||
|
|
||||||
|
- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html).
|
||||||
|
- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template).
|
||||||
|
|
||||||
|
### Limitation
|
||||||
|
|
||||||
|
Adapter finetuning does not work at the moment. It would error with
|
||||||
|
|
||||||
|
```bash
|
||||||
|
RuntimeError: mat1 and mat2 shapes cannot be multiplied (4096x3072 and 1x1179648)
|
||||||
|
```
|
||||||
|
|
||||||
|
In addition, if adapter training works, `lora_target_linear: true` will not work due to:
|
||||||
|
```bash
|
||||||
|
ValueError: Target module GraniteMoeHybridParallelExperts() is not supported.
|
||||||
|
```
|
||||||
|
|
||||||
|
## Optimization Guides
|
||||||
|
|
||||||
|
- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
|
||||||
|
- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
|
||||||
|
- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
|
||||||
|
|
||||||
|
## Related Resources
|
||||||
|
|
||||||
|
- [Granite Docs](https://www.ibm.com/granite/docs/models/granite)
|
||||||
|
- [Axolotl Docs](https://docs.axolotl.ai)
|
||||||
|
- [Axolotl Website](https://axolotl.ai)
|
||||||
|
- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
|
||||||
|
- [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
|
||||||
45
examples/granite4/granite-4.0-tiny-fft.yaml
Normal file
45
examples/granite4/granite-4.0-tiny-fft.yaml
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
base_model: ibm-granite/granite-4.0-tiny-preview
|
||||||
|
|
||||||
|
# Automatically upload checkpoint and final model to HF
|
||||||
|
# hub_model_id: username/custom_model_name
|
||||||
|
|
||||||
|
plugins:
|
||||||
|
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- path: fozziethebeat/alpaca_messages_2k_test
|
||||||
|
type: chat_template
|
||||||
|
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0.1
|
||||||
|
output_dir: ./outputs/model-out
|
||||||
|
|
||||||
|
sequence_len: 2048
|
||||||
|
sample_packing: true
|
||||||
|
|
||||||
|
wandb_project:
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 4
|
||||||
|
micro_batch_size: 2
|
||||||
|
num_epochs: 1
|
||||||
|
optimizer: adamw_bnb_8bit
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 0.0002
|
||||||
|
|
||||||
|
bf16: auto
|
||||||
|
tf32: false
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
resume_from_checkpoint:
|
||||||
|
logging_steps: 1
|
||||||
|
flash_attention: true
|
||||||
|
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
evals_per_epoch: 1
|
||||||
|
saves_per_epoch: 1
|
||||||
|
|
||||||
|
# save_first_step: true # uncomment this to validate checkpoint saving works with your config
|
||||||
@@ -13,8 +13,8 @@ Tencent released a family of opensource models called HunYuan with varying param
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
uv sync
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
uv pip install flash-attn --no-build-isolation
|
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ flex_attention: true
|
|||||||
flex_attn_compile_kwargs:
|
flex_attn_compile_kwargs:
|
||||||
dynamic: false
|
dynamic: false
|
||||||
mode: max-autotune-no-cudagraphs
|
mode: max-autotune-no-cudagraphs
|
||||||
|
save_strategy: no
|
||||||
torch_compile: true
|
torch_compile: true
|
||||||
|
|
||||||
wandb_project:
|
wandb_project:
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ eval_sample_packing: true
|
|||||||
gradient_accumulation_steps: 4
|
gradient_accumulation_steps: 4
|
||||||
micro_batch_size: 4
|
micro_batch_size: 4
|
||||||
num_epochs: 1
|
num_epochs: 1
|
||||||
warmup_steps: 0.1
|
warmup_ratio: 0.1
|
||||||
|
|
||||||
optimizer: adamw_8bit
|
optimizer: adamw_8bit
|
||||||
lr_scheduler: cosine
|
lr_scheduler: cosine
|
||||||
@@ -44,7 +44,7 @@ resume_from_checkpoint:
|
|||||||
sdp_attention: true
|
sdp_attention: true
|
||||||
|
|
||||||
logging_steps: 1
|
logging_steps: 1
|
||||||
save_strategy: best
|
save_strategy: epoch
|
||||||
eval_strategy: epoch
|
eval_strategy: epoch
|
||||||
|
|
||||||
special_tokens:
|
special_tokens:
|
||||||
|
|||||||
50
examples/llama-3/opentelemetry-qlora.yml
Normal file
50
examples/llama-3/opentelemetry-qlora.yml
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
base_model: NousResearch/Llama-3.2-1B
|
||||||
|
model_type: AutoModelForCausalLM
|
||||||
|
tokenizer_type: AutoTokenizer
|
||||||
|
|
||||||
|
load_in_4bit: true
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- path: mhenrichsen/alpaca_2k_test
|
||||||
|
type: alpaca
|
||||||
|
|
||||||
|
output_dir: ./outputs/opentelemetry-example
|
||||||
|
|
||||||
|
adapter: qlora
|
||||||
|
sequence_len: 512
|
||||||
|
sample_packing: false
|
||||||
|
|
||||||
|
lora_r: 32
|
||||||
|
lora_alpha: 16
|
||||||
|
lora_dropout: 0.05
|
||||||
|
lora_target_linear: true
|
||||||
|
|
||||||
|
# OpenTelemetry Configuration
|
||||||
|
use_otel_metrics: true
|
||||||
|
otel_metrics_host: "localhost"
|
||||||
|
otel_metrics_port: 8000
|
||||||
|
|
||||||
|
# Disable WandB
|
||||||
|
use_wandb: false
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 4
|
||||||
|
micro_batch_size: 2
|
||||||
|
num_epochs: 1
|
||||||
|
optimizer: paged_adamw_32bit
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 0.0002
|
||||||
|
|
||||||
|
bf16: auto
|
||||||
|
tf32: false
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
logging_steps: 1
|
||||||
|
flash_attention: false
|
||||||
|
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
evals_per_epoch: 2
|
||||||
|
saves_per_epoch: 1
|
||||||
|
weight_decay: 0.0
|
||||||
|
|
||||||
|
special_tokens:
|
||||||
|
pad_token: "<|end_of_text|>"
|
||||||
@@ -13,14 +13,9 @@ Thanks to the team at MistralAI for giving us early access to prepare for these
|
|||||||
Here is an example of how to install from pip:
|
Here is an example of how to install from pip:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have PyTorch installed (PyTorch 2.6.0 min)
|
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||||
# Option A: manage dependencies in your project
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
uv add 'axolotl>=0.12.0'
|
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
|
|
||||||
# Option B: quick install
|
|
||||||
uv pip install 'axolotl>=0.12.0'
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ Before starting, ensure you have:
|
|||||||
Run the thinking model fine-tuning:
|
Run the thinking model fine-tuning:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
axolotl train magistral-small-think-qlora.yaml
|
axolotl train examples/magistral/think/magistral-small-think-qlora.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
This config uses about 19.1 GiB VRAM.
|
This config uses about 19.1 GiB VRAM.
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ Before starting, ensure you have:
|
|||||||
|
|
||||||
3. Run the fine-tuning:
|
3. Run the fine-tuning:
|
||||||
```bash
|
```bash
|
||||||
axolotl train magistral-small-vision-24B-qlora.yml
|
axolotl train examples/magistral/vision/magistral-small-vision-24B-qlora.yml
|
||||||
```
|
```
|
||||||
|
|
||||||
This config uses about 17GiB VRAM.
|
This config uses about 17GiB VRAM.
|
||||||
|
|||||||
51
examples/mistral/mistral-small/README.md
Normal file
51
examples/mistral/mistral-small/README.md
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# Mistral Small 3.1/3.2 Fine-tuning
|
||||||
|
|
||||||
|
This guide covers fine-tuning [Mistral Small 3.1](mistralai/Mistral-Small-3.1-24B-Instruct-2503) and [Mistral Small 3.2](mistralai/Mistral-Small-3.2-24B-Instruct-2506) with vision capabilities using Axolotl.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Before starting, ensure you have:
|
||||||
|
- Installed Axolotl (see [Installation docs](https://docs.axolotl.ai/docs/installation.html))
|
||||||
|
|
||||||
|
## Getting Started
|
||||||
|
|
||||||
|
1. Install the required vision lib:
|
||||||
|
```bash
|
||||||
|
pip install 'mistral-common[opencv]==1.8.5'
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Download the example dataset image:
|
||||||
|
```bash
|
||||||
|
wget https://huggingface.co/datasets/Nanobit/text-vision-2k-test/resolve/main/African_elephant.jpg
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Run the fine-tuning:
|
||||||
|
```bash
|
||||||
|
axolotl train examples/mistral/mistral-small/mistral-small-3.1-24B-lora.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
This config uses about 29.4 GiB VRAM.
|
||||||
|
|
||||||
|
## Dataset Format
|
||||||
|
|
||||||
|
The vision model requires multi-modal dataset format as documented [here](https://docs.axolotl.ai/docs/multimodal.html#dataset-format).
|
||||||
|
|
||||||
|
One exception is that, passing `"image": PIL.Image` is not supported. MistralTokenizer only supports `path`, `url`, and `base64` for now.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": [{ "type": "text", "text": "{SYSTEM_PROMPT}"}]},
|
||||||
|
{"role": "user", "content": [
|
||||||
|
{ "type": "text", "text": "What's in this image?"},
|
||||||
|
{"type": "image", "path": "path/to/image.jpg" }
|
||||||
|
]},
|
||||||
|
{"role": "assistant", "content": [{ "type": "text", "text": "..." }]},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- Sample Packing is not supported for multi-modality training currently.
|
||||||
@@ -39,7 +39,7 @@ wandb_name:
|
|||||||
wandb_log_model:
|
wandb_log_model:
|
||||||
|
|
||||||
gradient_accumulation_steps: 1
|
gradient_accumulation_steps: 1
|
||||||
micro_batch_size: 1
|
micro_batch_size: 2
|
||||||
num_epochs: 1
|
num_epochs: 1
|
||||||
optimizer: adamw_bnb_8bit
|
optimizer: adamw_bnb_8bit
|
||||||
lr_scheduler: cosine
|
lr_scheduler: cosine
|
||||||
|
|||||||
46
examples/olmo3/README.md
Normal file
46
examples/olmo3/README.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# Finetune Allenai's Olmo 3 with Axolotl
|
||||||
|
|
||||||
|
[Olmo 3](https://huggingface.co/collections/allenai/olmo-3) are a family of 7B and 32B models open source models trained by The Allen Institute for Artificial Intelligence.
|
||||||
|
|
||||||
|
This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking.
|
||||||
|
|
||||||
|
## Getting started
|
||||||
|
|
||||||
|
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html).
|
||||||
|
|
||||||
|
Here is an example of how to install from pip:
|
||||||
|
```bash
|
||||||
|
# Ensure you have a compatible version of Pytorch installed
|
||||||
|
pip3 install packaging setuptools wheel ninja
|
||||||
|
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||||
|
|
||||||
|
# Install Cut Cross Entropy
|
||||||
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run the finetuning example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
axolotl train examples/olmo3/olmo3-7b-qlora.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Let us know how it goes. Happy finetuning! 🚀
|
||||||
|
|
||||||
|
### TIPS
|
||||||
|
|
||||||
|
- The example config can be re-used for Olmo and Olmo 2.
|
||||||
|
- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config.
|
||||||
|
- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html).
|
||||||
|
- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template).
|
||||||
|
|
||||||
|
## Optimization Guides
|
||||||
|
|
||||||
|
Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html).
|
||||||
|
|
||||||
|
## Related Resources
|
||||||
|
|
||||||
|
- [Olmo 3 Blog](https://allenai.org/blog/olmo3)
|
||||||
|
- [Axolotl Docs](https://docs.axolotl.ai)
|
||||||
|
- [Axolotl Website](https://axolotl.ai)
|
||||||
|
- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
|
||||||
|
- [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
|
||||||
64
examples/olmo3/olmo3-7b-qlora.yaml
Normal file
64
examples/olmo3/olmo3-7b-qlora.yaml
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
base_model: allenai/Olmo-3-7B-Instruct-SFT
|
||||||
|
|
||||||
|
# Automatically upload checkpoint and final model to HF
|
||||||
|
# hub_model_id: username/custom_model_name
|
||||||
|
|
||||||
|
plugins:
|
||||||
|
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||||
|
|
||||||
|
load_in_8bit: false
|
||||||
|
load_in_4bit: true
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- path: fozziethebeat/alpaca_messages_2k_test
|
||||||
|
type: chat_template
|
||||||
|
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0.1
|
||||||
|
output_dir: ./outputs/lora-out
|
||||||
|
|
||||||
|
adapter: qlora
|
||||||
|
lora_model_dir:
|
||||||
|
|
||||||
|
sequence_len: 2048
|
||||||
|
sample_packing: true
|
||||||
|
|
||||||
|
lora_r: 32
|
||||||
|
lora_alpha: 16
|
||||||
|
lora_dropout: 0.05
|
||||||
|
lora_target_linear: true
|
||||||
|
lora_target_modules:
|
||||||
|
- gate_proj
|
||||||
|
- down_proj
|
||||||
|
- up_proj
|
||||||
|
- q_proj
|
||||||
|
- v_proj
|
||||||
|
- k_proj
|
||||||
|
- o_proj
|
||||||
|
|
||||||
|
wandb_project:
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 4
|
||||||
|
micro_batch_size: 2
|
||||||
|
num_epochs: 1
|
||||||
|
optimizer: adamw_bnb_8bit
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 0.0002
|
||||||
|
|
||||||
|
bf16: auto
|
||||||
|
tf32: false
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
resume_from_checkpoint:
|
||||||
|
logging_steps: 1
|
||||||
|
flash_attention: true
|
||||||
|
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
evals_per_epoch: 1
|
||||||
|
saves_per_epoch: 1
|
||||||
|
|
||||||
|
# save_first_step: true # uncomment this to validate checkpoint saving works with your config
|
||||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
uv sync
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
uv pip install flash-attn --no-build-isolation
|
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
@@ -24,12 +24,12 @@ python scripts/cutcrossentropy_install.py | sh
|
|||||||
|
|
||||||
2. Install Qwen3-Next transformers commit
|
2. Install Qwen3-Next transformers commit
|
||||||
```bash
|
```bash
|
||||||
uv pip uninstall -y transformers && uv pip install "git+https://github.com/huggingface/transformers.git@b9282355bea846b54ed850a066901496b19da654"
|
pip3 uninstall -y transformers && pip3 install "git+https://github.com/huggingface/transformers.git@b9282355bea846b54ed850a066901496b19da654"
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Install FLA for improved performance
|
3. Install FLA for improved performance
|
||||||
```bash
|
```bash
|
||||||
uv pip uninstall -y causal-conv1d && uv pip install flash-linear-attention==0.3.2
|
pip3 uninstall -y causal-conv1d && pip3 install flash-linear-attention==0.3.2
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Run the finetuning example:
|
4. Run the finetuning example:
|
||||||
|
|||||||
@@ -6,21 +6,17 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
|||||||
|
|
||||||
## Getting started
|
## Getting started
|
||||||
|
|
||||||
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). You need to install from main as Seed-OSS is only on nightly or use our latest [Docker images](https://docs.axolotl.ai/docs/docker.html).
|
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html).
|
||||||
|
|
||||||
Here is an example of how to install from main for pip:
|
Here is an example of how to install from pip:
|
||||||
|
```bash
|
||||||
|
# Ensure you have a compatible version of Pytorch installed
|
||||||
|
pip3 install packaging setuptools wheel ninja
|
||||||
|
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||||
|
|
||||||
```bash
|
# Install Cut Cross Entropy
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
```
|
||||||
cd axolotl
|
|
||||||
|
|
||||||
uv sync --extra deepspeed
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
|
|
||||||
# Install Cut Cross Entropy
|
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Run the finetuning example:
|
2. Run the finetuning example:
|
||||||
|
|
||||||
@@ -41,9 +37,7 @@ Let us know how it goes. Happy finetuning! 🚀
|
|||||||
|
|
||||||
## Optimization Guides
|
## Optimization Guides
|
||||||
|
|
||||||
- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
|
Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html).
|
||||||
- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
|
|
||||||
- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
|
|
||||||
|
|
||||||
## Related Resources
|
## Related Resources
|
||||||
|
|
||||||
|
|||||||
@@ -13,19 +13,14 @@ This guide shows how to fine-tune SmolVLM2 models with Axolotl.
|
|||||||
Here is an example of how to install from pip:
|
Here is an example of how to install from pip:
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have a compatible version of Pytorch installed
|
# Ensure you have a compatible version of Pytorch installed
|
||||||
# Option A: manage dependencies in your project
|
pip3 install packaging setuptools wheel ninja
|
||||||
uv add 'axolotl>=0.12.0'
|
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
|
|
||||||
# Option B: quick install
|
|
||||||
uv pip install 'axolotl>=0.12.0'
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install an extra dependency:
|
2. Install an extra dependency:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv pip install num2words==0.5.14
|
pip3 install num2words==0.5.14
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Run the finetuning example:
|
3. Run the finetuning example:
|
||||||
@@ -42,9 +37,7 @@ This guide shows how to fine-tune SmolVLM2 models with Axolotl.
|
|||||||
|
|
||||||
## Optimization Guides
|
## Optimization Guides
|
||||||
|
|
||||||
- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
|
Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html).
|
||||||
- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
|
|
||||||
- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
|
|
||||||
|
|
||||||
## Related Resources
|
## Related Resources
|
||||||
|
|
||||||
|
|||||||
@@ -12,21 +12,16 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||||
# Option A: manage dependencies in your project
|
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||||
uv add 'axolotl>=0.12.0'
|
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
|
|
||||||
# Option B: quick install
|
|
||||||
uv pip install 'axolotl>=0.12.0'
|
|
||||||
uv pip install flash-attn --no-build-isolation
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Please install the below.
|
2. Please install the below.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# audio
|
# audio
|
||||||
uv pip install librosa==0.11.0
|
pip3 install librosa==0.11.0
|
||||||
uv pip install 'mistral_common[audio]==1.8.3'
|
pip3 install 'mistral_common[audio]==1.8.3'
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
base_model: mistralai/Voxtral-Mini-3B-2507
|
base_model: mistralai/Voxtral-Mini-3B-2507
|
||||||
processor_type: AutoProcessor
|
processor_type: VoxtralProcessor
|
||||||
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
# Automatically upload checkpoint and final model to HF
|
||||||
# hub_model_id: username/custom_model_name
|
# hub_model_id: username/custom_model_name
|
||||||
|
|||||||
201
pyproject.toml
201
pyproject.toml
@@ -1,131 +1,14 @@
|
|||||||
[build-system]
|
[build-system]
|
||||||
requires = ["setuptools>=64", "wheel", "setuptools_scm>=8"]
|
requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==23.2"]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "axolotl"
|
name = "axolotl"
|
||||||
dynamic = ["version"]
|
dynamic = ["version", "dependencies", "optional-dependencies"]
|
||||||
description = "LLM Trainer"
|
description = "LLM Trainer"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10,<3.13"
|
requires-python = ">=3.10"
|
||||||
license = {text = "Apache-2.0"}
|
# license = "Apache-2.0"
|
||||||
authors = [
|
|
||||||
{name = "Axolotl AI"},
|
|
||||||
]
|
|
||||||
maintainers = [
|
|
||||||
{name = "Axolotl AI"},
|
|
||||||
]
|
|
||||||
classifiers = [
|
|
||||||
"Development Status :: 4 - Beta",
|
|
||||||
"License :: OSI Approved :: Apache Software License",
|
|
||||||
"Programming Language :: Python :: 3",
|
|
||||||
"Programming Language :: Python :: 3.10",
|
|
||||||
"Programming Language :: Python :: 3.11",
|
|
||||||
"Programming Language :: Python :: 3.12",
|
|
||||||
]
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
"torch>=2.6.0",
|
|
||||||
"packaging>=23.2",
|
|
||||||
"huggingface_hub>=0.33.0",
|
|
||||||
"peft==0.17.0",
|
|
||||||
"transformers==4.56.1",
|
|
||||||
"tokenizers>=0.21.1",
|
|
||||||
"accelerate==1.10.1",
|
|
||||||
"datasets==4.0.0",
|
|
||||||
"trl==0.23.0",
|
|
||||||
"hf_xet==1.1.5",
|
|
||||||
"kernels==0.9.0",
|
|
||||||
"trackio",
|
|
||||||
"optimum==1.16.2",
|
|
||||||
"hf_transfer",
|
|
||||||
"sentencepiece",
|
|
||||||
"gradio==5.41.1",
|
|
||||||
"modal==1.0.2",
|
|
||||||
"pydantic>=2.10.6",
|
|
||||||
"addict",
|
|
||||||
"fire",
|
|
||||||
"PyYAML>=6.0",
|
|
||||||
"requests",
|
|
||||||
"wandb",
|
|
||||||
"einops",
|
|
||||||
"colorama",
|
|
||||||
"numba",
|
|
||||||
"numpy>=1.24.4,<3.0",
|
|
||||||
"evaluate==0.4.1",
|
|
||||||
"scipy",
|
|
||||||
"scikit-learn>=1.7.0",
|
|
||||||
"nvidia-ml-py==12.560.30",
|
|
||||||
"art",
|
|
||||||
"tensorboard",
|
|
||||||
"python-dotenv==1.0.1",
|
|
||||||
"s3fs>=2024.5.0",
|
|
||||||
"gcsfs>=2024.5.0",
|
|
||||||
"adlfs>=2024.5.0",
|
|
||||||
"ocifs==1.3.2",
|
|
||||||
"zstandard>=0.23.0",
|
|
||||||
"fastcore",
|
|
||||||
"lm_eval==0.4.7",
|
|
||||||
"langdetect==1.0.9",
|
|
||||||
"immutabledict==4.2.0",
|
|
||||||
"antlr4-python3-runtime==4.13.2",
|
|
||||||
"schedulefree==1.4.1",
|
|
||||||
"mistral-common==1.8.5",
|
|
||||||
|
|
||||||
# Axolotl contribs
|
|
||||||
"axolotl-contribs-lgpl @ git+https://github.com/axolotl-ai-cloud/axolotl-contribs-lgpl.git@numpy",
|
|
||||||
"axolotl-contribs-mit==0.0.5",
|
|
||||||
|
|
||||||
# Platform-specific dependencies (Linux by default, excluded on macOS)
|
|
||||||
"triton>=3.0.0 ; sys_platform != 'darwin'",
|
|
||||||
"xformers>=0.0.28 ; sys_platform != 'darwin'",
|
|
||||||
"autoawq==0.2.7.post3 ; sys_platform != 'darwin'",
|
|
||||||
"liger-kernel==0.6.1 ; sys_platform != 'darwin'",
|
|
||||||
"torchao==0.13.0 ; sys_platform != 'darwin'",
|
|
||||||
"bitsandbytes==0.47.0 ; sys_platform != 'darwin'",
|
|
||||||
"deepspeed>=0.17.5 ; sys_platform != 'darwin'",
|
|
||||||
"deepspeed-kernels ; sys_platform != 'darwin'",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.optional-dependencies]
|
|
||||||
ring-flash-attn = [
|
|
||||||
"ring-flash-attn>=0.1.7",
|
|
||||||
"yunchang==0.6.0",
|
|
||||||
]
|
|
||||||
mamba-ssm = ["mamba-ssm>=2.2.0", "causal_conv1d>=1.4.0",]
|
|
||||||
gptqmodel = ["gptqmodel>=4.0.0"]
|
|
||||||
mlflow = ["mlflow"]
|
|
||||||
galore = ["galore_torch"]
|
|
||||||
apollo = ["apollo-torch"]
|
|
||||||
optimizers = [
|
|
||||||
"galore_torch",
|
|
||||||
"apollo-torch",
|
|
||||||
"lomo-optim==0.1.1",
|
|
||||||
"torch-optimi==0.2.1",
|
|
||||||
"came_pytorch==0.1.3",
|
|
||||||
]
|
|
||||||
ray = ["ray[train]"]
|
|
||||||
vllm = ["vllm>=0.10.0"]
|
|
||||||
llmcompressor = ["llmcompressor>=0.5.1"]
|
|
||||||
fbgemm-gpu = ["fbgemm-gpu-genai>=1.2.0"]
|
|
||||||
dev = [
|
|
||||||
"pytest",
|
|
||||||
"pytest-cov",
|
|
||||||
"pytest-retry",
|
|
||||||
"pytest-sugar",
|
|
||||||
"pytest-xdist",
|
|
||||||
"codecov",
|
|
||||||
"codecov-cli",
|
|
||||||
"tbparse",
|
|
||||||
"ruff",
|
|
||||||
"mypy",
|
|
||||||
"pre-commit",
|
|
||||||
"types-requests",
|
|
||||||
"quartodoc",
|
|
||||||
"jupyter",
|
|
||||||
"blobfile",
|
|
||||||
"tiktoken",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
axolotl = "axolotl.cli.main:main"
|
axolotl = "axolotl.cli.main:main"
|
||||||
@@ -134,20 +17,15 @@ axolotl = "axolotl.cli.main:main"
|
|||||||
Homepage = "https://axolotl.ai/"
|
Homepage = "https://axolotl.ai/"
|
||||||
Documentation = "https://docs.axolotl.ai/"
|
Documentation = "https://docs.axolotl.ai/"
|
||||||
Repository = "https://github.com/axolotl-ai-cloud/axolotl.git"
|
Repository = "https://github.com/axolotl-ai-cloud/axolotl.git"
|
||||||
Issues = "https://github.com/axolotl-ai-cloud/axolotl/issues"
|
|
||||||
|
|
||||||
[tool.setuptools]
|
|
||||||
package-dir = {"" = "src"}
|
|
||||||
include-package-data = true
|
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
|
||||||
where = ["src"]
|
|
||||||
|
|
||||||
[tool.setuptools.package-data]
|
|
||||||
"*" = ["*.yaml", "*.yml", "*.json"]
|
|
||||||
|
|
||||||
[tool.setuptools_scm]
|
[tool.setuptools_scm]
|
||||||
write_to = "src/axolotl/_version.py"
|
|
||||||
|
[tool.setuptools]
|
||||||
|
py-modules = ["setuptools_axolotl_dynamic_dependencies"]
|
||||||
|
include-package-data = true
|
||||||
|
|
||||||
|
[tool.setuptools.cmdclass]
|
||||||
|
build_py = "setuptools_axolotl_dynamic_dependencies.BuildPyCommand"
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
line-length = 88
|
line-length = 88
|
||||||
@@ -179,60 +57,3 @@ indent-style = "space"
|
|||||||
skip-magic-trailing-comma = false
|
skip-magic-trailing-comma = false
|
||||||
line-ending = "auto"
|
line-ending = "auto"
|
||||||
docstring-code-format = false
|
docstring-code-format = false
|
||||||
|
|
||||||
[tool.mypy]
|
|
||||||
python_version = "3.11"
|
|
||||||
warn_return_any = true
|
|
||||||
warn_unused_configs = true
|
|
||||||
ignore_missing_imports = true
|
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
|
||||||
testpaths = ["tests"]
|
|
||||||
python_files = ["test_*.py", "*_test.py"]
|
|
||||||
addopts = "-v --tb=short"
|
|
||||||
|
|
||||||
# UV specific configuration
|
|
||||||
[tool.uv]
|
|
||||||
prerelease = "allow"
|
|
||||||
default-groups = ["default"]
|
|
||||||
conflicts = [
|
|
||||||
[
|
|
||||||
{ group = "default" },
|
|
||||||
{ extra = "vllm" },
|
|
||||||
],
|
|
||||||
]
|
|
||||||
|
|
||||||
[dependency-groups]
|
|
||||||
default = ["torch>=2.6.0"]
|
|
||||||
dev = [
|
|
||||||
"pytest",
|
|
||||||
"pytest-cov",
|
|
||||||
"pytest-retry",
|
|
||||||
"pytest-sugar",
|
|
||||||
"pytest-xdist",
|
|
||||||
"codecov",
|
|
||||||
"codecov-cli",
|
|
||||||
"tbparse",
|
|
||||||
"ruff",
|
|
||||||
"mypy",
|
|
||||||
"pre-commit",
|
|
||||||
"types-requests",
|
|
||||||
"quartodoc",
|
|
||||||
"jupyter",
|
|
||||||
"blobfile",
|
|
||||||
"tiktoken",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[tool.uv.index]]
|
|
||||||
name = "autogptq"
|
|
||||||
url = "https://huggingface.github.io/autogptq-index/whl/"
|
|
||||||
|
|
||||||
[tool.uv.extra-build-dependencies]
|
|
||||||
mamba-ssm = ["torch", "causal_conv1d"]
|
|
||||||
gptqmodel = [
|
|
||||||
{ requirement = "torch", match-runtime = true },
|
|
||||||
]
|
|
||||||
autoawq = ["torch"]
|
|
||||||
triton = ["torch"]
|
|
||||||
bitsandbytes = ["torch"]
|
|
||||||
grpclib = ["wheel"]
|
|
||||||
|
|||||||
8
requirements-dev.txt
Normal file
8
requirements-dev.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
black
|
||||||
|
mypy
|
||||||
|
pre-commit
|
||||||
|
types-requests
|
||||||
|
quartodoc
|
||||||
|
jupyter
|
||||||
|
blobfile
|
||||||
|
tiktoken
|
||||||
8
requirements-tests.txt
Normal file
8
requirements-tests.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
codecov
|
||||||
|
codecov-cli
|
||||||
|
pytest
|
||||||
|
pytest-cov
|
||||||
|
pytest-retry
|
||||||
|
pytest-sugar
|
||||||
|
pytest-xdist
|
||||||
|
tbparse
|
||||||
75
requirements.txt
Normal file
75
requirements.txt
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
--extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
|
||||||
|
|
||||||
|
# START section of dependencies that don't install on Darwin/MacOS
|
||||||
|
bitsandbytes==0.48.2
|
||||||
|
triton>=3.0.0
|
||||||
|
mamba-ssm==1.2.0.post1
|
||||||
|
xformers>=0.0.23.post1
|
||||||
|
liger-kernel==0.6.3
|
||||||
|
# END section
|
||||||
|
|
||||||
|
packaging==23.2
|
||||||
|
|
||||||
|
huggingface_hub>=0.36.0
|
||||||
|
peft>=0.18.0
|
||||||
|
tokenizers>=0.22.1
|
||||||
|
transformers==4.57.1
|
||||||
|
accelerate==1.11.0
|
||||||
|
datasets==4.4.1
|
||||||
|
deepspeed>=0.17.0
|
||||||
|
trl==0.25.0
|
||||||
|
hf_xet==1.2.0
|
||||||
|
kernels>=0.9.0
|
||||||
|
trackio
|
||||||
|
|
||||||
|
optimum==1.16.2
|
||||||
|
hf_transfer
|
||||||
|
sentencepiece
|
||||||
|
gradio==5.49.1
|
||||||
|
|
||||||
|
modal==1.0.2
|
||||||
|
pydantic>=2.10.6
|
||||||
|
addict
|
||||||
|
fire
|
||||||
|
PyYAML>=6.0
|
||||||
|
requests
|
||||||
|
wandb
|
||||||
|
einops
|
||||||
|
colorama
|
||||||
|
numba>=0.61.2
|
||||||
|
numpy>=2.2.6
|
||||||
|
|
||||||
|
# qlora things
|
||||||
|
evaluate==0.4.1
|
||||||
|
scipy
|
||||||
|
nvidia-ml-py==12.560.30
|
||||||
|
art
|
||||||
|
tensorboard
|
||||||
|
python-dotenv==1.0.1
|
||||||
|
|
||||||
|
# remote filesystems
|
||||||
|
s3fs>=2024.5.0
|
||||||
|
gcsfs>=2025.3.0
|
||||||
|
adlfs>=2024.5.0
|
||||||
|
ocifs==1.3.2
|
||||||
|
|
||||||
|
zstandard==0.22.0
|
||||||
|
fastcore
|
||||||
|
|
||||||
|
# lm eval harness
|
||||||
|
lm_eval==0.4.7
|
||||||
|
langdetect==1.0.9
|
||||||
|
immutabledict==4.2.0
|
||||||
|
antlr4-python3-runtime==4.13.2
|
||||||
|
|
||||||
|
torchao==0.13.0
|
||||||
|
openenv-core==0.1.0
|
||||||
|
schedulefree==1.4.1
|
||||||
|
|
||||||
|
axolotl-contribs-lgpl==0.0.7
|
||||||
|
axolotl-contribs-mit==0.0.5
|
||||||
|
|
||||||
|
# telemetry
|
||||||
|
posthog==6.7.11
|
||||||
|
|
||||||
|
mistral-common==1.8.5
|
||||||
31
scripts/cutcrossentropy_install.py
Executable file → Normal file
31
scripts/cutcrossentropy_install.py
Executable file → Normal file
@@ -1,24 +1,33 @@
|
|||||||
"""Print the pip command to install Axolotl's cut_cross_entropy fork."""
|
"""Script to output the correct installation command for cut-cross-entropy."""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
import importlib.util
|
||||||
import sys
|
import sys
|
||||||
from shlex import quote
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import torch
|
import torch
|
||||||
except ImportError as exc: # pragma: no cover
|
except ImportError as exc:
|
||||||
raise ImportError("Install torch via `pip install torch`") from exc
|
raise ImportError("Install torch via `pip install torch`") from exc
|
||||||
|
|
||||||
from packaging.version import Version as V
|
from packaging.version import Version as V
|
||||||
|
|
||||||
if V(torch.__version__.split("+")[0]) < V("2.6.0"):
|
USE_UV = "--uv" in sys.argv[1:]
|
||||||
|
|
||||||
|
v = V(torch.__version__)
|
||||||
|
|
||||||
|
# no cut-cross-entropy support for torch < 2.4.0
|
||||||
|
if v < V("2.4.0"):
|
||||||
print("")
|
print("")
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
python_exe = quote(sys.executable)
|
cce_spec = importlib.util.find_spec("cut_cross_entropy")
|
||||||
|
|
||||||
|
UNINSTALL_PREFIX = ""
|
||||||
|
if cce_spec:
|
||||||
|
if not importlib.util.find_spec("cut_cross_entropy.transformers"):
|
||||||
|
UNINSTALL_PREFIX = "pip uninstall -y cut-cross-entropy && "
|
||||||
|
|
||||||
|
UV_PREFIX = "uv " if USE_UV else ""
|
||||||
|
|
||||||
print(
|
print(
|
||||||
f"{python_exe} -m pip install "
|
UNINSTALL_PREFIX
|
||||||
'"cut-cross-entropy[transformers] '
|
+ f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953"'
|
||||||
'@ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"'
|
|
||||||
)
|
)
|
||||||
|
|||||||
72
scripts/unsloth_install.py
Executable file → Normal file
72
scripts/unsloth_install.py
Executable file → Normal file
@@ -1,48 +1,40 @@
|
|||||||
"""Emit the install commands for Unsloth without altering torch."""
|
# noqa
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import shutil
|
|
||||||
import sys
|
import sys
|
||||||
from shlex import quote
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import torch
|
import torch
|
||||||
except ImportError as exc: # pragma: no cover
|
except ImportError as error:
|
||||||
raise ImportError("Install torch via `pip install torch`") from exc
|
raise ImportError("Install torch via `pip install torch`") from error
|
||||||
|
|
||||||
from packaging.version import Version as V
|
from packaging.version import Version as V
|
||||||
|
|
||||||
MIN_TORCH = V("2.6.0")
|
use_uv = "--uv" in sys.argv[1:]
|
||||||
|
|
||||||
if V(torch.__version__.split("+")[0]) < MIN_TORCH:
|
v = V(torch.__version__)
|
||||||
raise RuntimeError(
|
cuda = str(torch.version.cuda)
|
||||||
f"Torch {torch.__version__} detected, but Unsloth requires >= {MIN_TORCH}."
|
try:
|
||||||
)
|
is_ampere = torch.cuda.get_device_capability()[0] >= 8
|
||||||
|
except RuntimeError:
|
||||||
USE_UV_FLAG = "--uv" in sys.argv[1:]
|
is_ampere = False
|
||||||
USE_PIP_FLAG = "--pip" in sys.argv[1:]
|
if cuda != "12.1" and cuda != "11.8" and cuda != "12.4":
|
||||||
|
raise RuntimeError(f"CUDA = {cuda} not supported!")
|
||||||
if USE_UV_FLAG and USE_PIP_FLAG:
|
if v <= V("2.1.0"):
|
||||||
raise SystemExit("Specify only one of --uv or --pip")
|
raise RuntimeError(f"Torch = {v} too old!")
|
||||||
|
elif v <= V("2.1.1"):
|
||||||
if USE_PIP_FLAG:
|
x = "cu{}{}-torch211"
|
||||||
use_uv = False
|
elif v <= V("2.1.2"):
|
||||||
elif USE_UV_FLAG:
|
x = "cu{}{}-torch212"
|
||||||
use_uv = True
|
elif v < V("2.3.0"):
|
||||||
|
x = "cu{}{}-torch220"
|
||||||
|
elif v < V("2.4.0"):
|
||||||
|
x = "cu{}{}-torch230"
|
||||||
|
elif v < V("2.5.0"):
|
||||||
|
x = "cu{}{}-torch240"
|
||||||
|
elif v < V("2.6.0"):
|
||||||
|
x = "cu{}{}-torch250"
|
||||||
else:
|
else:
|
||||||
use_uv = shutil.which("uv") is not None
|
raise RuntimeError(f"Torch = {v} too new!")
|
||||||
|
x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
|
||||||
python_exe = quote(sys.executable or shutil.which("python3") or "python")
|
uv_prefix = "uv " if use_uv else ""
|
||||||
|
print(
|
||||||
if use_uv:
|
f'{uv_prefix}pip install unsloth-zoo==2024.12.1 && {uv_prefix}pip install --no-deps "unsloth[{x}]==2024.12.4"'
|
||||||
installer = "uv pip install --system --no-deps"
|
)
|
||||||
else:
|
|
||||||
installer = f"{python_exe} -m pip install --no-deps"
|
|
||||||
|
|
||||||
commands = [
|
|
||||||
f"{installer} unsloth-zoo==2025.9.12",
|
|
||||||
f'{installer} "unsloth[huggingface]==2025.9.9"',
|
|
||||||
]
|
|
||||||
|
|
||||||
print(" && ".join(commands))
|
|
||||||
|
|||||||
192
setup.py
Normal file
192
setup.py
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
"""setup.py for axolotl"""
|
||||||
|
|
||||||
|
import ast
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import re
|
||||||
|
from importlib.metadata import PackageNotFoundError, version
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from setuptools import find_packages, setup
|
||||||
|
|
||||||
|
|
||||||
|
def parse_requirements(extras_require_map):
|
||||||
|
_install_requires = []
|
||||||
|
_dependency_links = []
|
||||||
|
with open("./requirements.txt", encoding="utf-8") as requirements_file:
|
||||||
|
lines = [r.strip() for r in requirements_file.readlines()]
|
||||||
|
for line in lines:
|
||||||
|
is_extras = "deepspeed" in line or "mamba-ssm" in line
|
||||||
|
if line.startswith("--extra-index-url"):
|
||||||
|
# Handle custom index URLs
|
||||||
|
_, url = line.split()
|
||||||
|
_dependency_links.append(url)
|
||||||
|
elif not is_extras and line and line[0] != "#":
|
||||||
|
# Handle standard packages
|
||||||
|
_install_requires.append(line)
|
||||||
|
try:
|
||||||
|
xformers_version = [req for req in _install_requires if "xformers" in req][0]
|
||||||
|
if "Darwin" in platform.system():
|
||||||
|
# skip packages not compatible with OSX
|
||||||
|
skip_packages = [
|
||||||
|
"bitsandbytes",
|
||||||
|
"triton",
|
||||||
|
"mamba-ssm",
|
||||||
|
"xformers",
|
||||||
|
"liger-kernel",
|
||||||
|
]
|
||||||
|
_install_requires = [
|
||||||
|
req
|
||||||
|
for req in _install_requires
|
||||||
|
if re.split(r"[>=<]", req)[0].strip() not in skip_packages
|
||||||
|
]
|
||||||
|
print(
|
||||||
|
_install_requires, [req in skip_packages for req in _install_requires]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# detect the version of torch already installed
|
||||||
|
# and set it so dependencies don't clobber the torch version
|
||||||
|
try:
|
||||||
|
torch_version = version("torch")
|
||||||
|
except PackageNotFoundError:
|
||||||
|
torch_version = "2.8.0" # default to torch 2.8.0
|
||||||
|
_install_requires.append(f"torch=={torch_version}")
|
||||||
|
|
||||||
|
version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version)
|
||||||
|
if version_match:
|
||||||
|
major, minor, patch = version_match.groups()
|
||||||
|
major, minor = int(major), int(minor)
|
||||||
|
patch = (
|
||||||
|
int(patch) if patch is not None else 0
|
||||||
|
) # Default patch to 0 if not present
|
||||||
|
else:
|
||||||
|
raise ValueError("Invalid version format")
|
||||||
|
|
||||||
|
if (major, minor) >= (2, 9):
|
||||||
|
extras_require_map.pop("fbgemm-gpu")
|
||||||
|
extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.4.1"]
|
||||||
|
extras_require_map["vllm"] = ["vllm==0.11.1"]
|
||||||
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
|
elif (major, minor) >= (2, 8):
|
||||||
|
extras_require_map.pop("fbgemm-gpu")
|
||||||
|
extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.3.0"]
|
||||||
|
extras_require_map["vllm"] = ["vllm==0.11.0"]
|
||||||
|
elif (major, minor) >= (2, 7):
|
||||||
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
|
if patch == 0:
|
||||||
|
_install_requires.append("xformers==0.0.30")
|
||||||
|
# vllm 0.9.x is incompatible with latest transformers
|
||||||
|
extras_require_map.pop("vllm")
|
||||||
|
else:
|
||||||
|
_install_requires.append("xformers==0.0.31")
|
||||||
|
extras_require_map["vllm"] = ["vllm==0.10.1"]
|
||||||
|
elif (major, minor) >= (2, 6):
|
||||||
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
|
_install_requires.append("xformers==0.0.29.post3")
|
||||||
|
# since we only support 2.6.0+cu126
|
||||||
|
_dependency_links.append("https://download.pytorch.org/whl/cu126")
|
||||||
|
extras_require_map.pop("vllm")
|
||||||
|
elif (major, minor) >= (2, 5):
|
||||||
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
|
if patch == 0:
|
||||||
|
_install_requires.append("xformers==0.0.28.post2")
|
||||||
|
else:
|
||||||
|
_install_requires.append("xformers>=0.0.28.post3")
|
||||||
|
extras_require_map.pop("vllm")
|
||||||
|
elif (major, minor) >= (2, 4):
|
||||||
|
extras_require_map.pop("vllm")
|
||||||
|
if patch == 0:
|
||||||
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
|
_install_requires.append("xformers>=0.0.27")
|
||||||
|
else:
|
||||||
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
|
_install_requires.append("xformers==0.0.28.post1")
|
||||||
|
else:
|
||||||
|
raise ValueError("axolotl requires torch>=2.4")
|
||||||
|
|
||||||
|
except PackageNotFoundError:
|
||||||
|
pass
|
||||||
|
return _install_requires, _dependency_links, extras_require_map
|
||||||
|
|
||||||
|
|
||||||
|
def get_package_version():
|
||||||
|
with open(
|
||||||
|
Path(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
/ "src"
|
||||||
|
/ "axolotl"
|
||||||
|
/ "__init__.py",
|
||||||
|
"r",
|
||||||
|
encoding="utf-8",
|
||||||
|
) as fin:
|
||||||
|
version_match = re.search(r"^__version__\s*=\s*(.*)$", fin.read(), re.MULTILINE)
|
||||||
|
version_ = ast.literal_eval(version_match.group(1))
|
||||||
|
return version_
|
||||||
|
|
||||||
|
|
||||||
|
extras_require = {
|
||||||
|
"flash-attn": ["flash-attn==2.8.3"],
|
||||||
|
"ring-flash-attn": [
|
||||||
|
"flash-attn==2.8.3",
|
||||||
|
"ring-flash-attn>=0.1.7",
|
||||||
|
],
|
||||||
|
"deepspeed": [
|
||||||
|
"deepspeed==0.18.2",
|
||||||
|
"deepspeed-kernels",
|
||||||
|
],
|
||||||
|
"mamba-ssm": [
|
||||||
|
"mamba-ssm==1.2.0.post1",
|
||||||
|
"causal_conv1d",
|
||||||
|
],
|
||||||
|
"auto-gptq": [
|
||||||
|
"auto-gptq==0.5.1",
|
||||||
|
],
|
||||||
|
"mlflow": [
|
||||||
|
"mlflow",
|
||||||
|
],
|
||||||
|
"galore": [
|
||||||
|
"galore_torch",
|
||||||
|
],
|
||||||
|
"apollo": [
|
||||||
|
"apollo-torch",
|
||||||
|
],
|
||||||
|
"optimizers": [
|
||||||
|
"galore_torch",
|
||||||
|
"apollo-torch",
|
||||||
|
"lomo-optim==0.1.1",
|
||||||
|
"torch-optimi==0.2.1",
|
||||||
|
"came_pytorch==0.1.3",
|
||||||
|
],
|
||||||
|
"ray": [
|
||||||
|
"ray[train]",
|
||||||
|
],
|
||||||
|
"vllm": [
|
||||||
|
"vllm==0.10.0",
|
||||||
|
],
|
||||||
|
"llmcompressor": [
|
||||||
|
"llmcompressor==0.5.1",
|
||||||
|
],
|
||||||
|
"fbgemm-gpu": ["fbgemm-gpu-genai==1.3.0"],
|
||||||
|
"opentelemetry": [
|
||||||
|
"opentelemetry-api",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"opentelemetry-exporter-prometheus",
|
||||||
|
"prometheus-client",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
install_requires, dependency_links, extras_require_build = parse_requirements(
|
||||||
|
extras_require
|
||||||
|
)
|
||||||
|
|
||||||
|
setup(
|
||||||
|
version=get_package_version(),
|
||||||
|
package_dir={"": "src"},
|
||||||
|
packages=find_packages("src"),
|
||||||
|
install_requires=install_requires,
|
||||||
|
dependency_links=dependency_links,
|
||||||
|
entry_points={
|
||||||
|
"console_scripts": [
|
||||||
|
"axolotl=axolotl.cli.main:main",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
extras_require=extras_require_build,
|
||||||
|
)
|
||||||
@@ -1,17 +1,7 @@
|
|||||||
"""Axolotl - Train and fine-tune large language models."""
|
"""Axolotl - Train and fine-tune large language models"""
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import pkgutil
|
import pkgutil
|
||||||
from importlib import metadata
|
|
||||||
|
|
||||||
try:
|
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
|
||||||
from ._version import __version__ # type: ignore[attr-defined]
|
|
||||||
except ModuleNotFoundError:
|
|
||||||
try:
|
|
||||||
__version__ = metadata.version("axolotl")
|
|
||||||
except metadata.PackageNotFoundError: # pragma: no cover
|
|
||||||
__version__ = "0+unknown"
|
|
||||||
|
|
||||||
__path__ = pkgutil.extend_path(__path__, __name__)
|
__version__ = "0.13.0.dev"
|
||||||
__all__ = ["__version__"]
|
|
||||||
|
|||||||
@@ -14,6 +14,8 @@ import yaml
|
|||||||
from transformers.utils import is_torch_bf16_gpu_available
|
from transformers.utils import is_torch_bf16_gpu_available
|
||||||
|
|
||||||
from axolotl.integrations.base import PluginManager
|
from axolotl.integrations.base import PluginManager
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
|
from axolotl.telemetry.manager import TelemetryManager
|
||||||
from axolotl.utils.comet_ import setup_comet_env_vars
|
from axolotl.utils.comet_ import setup_comet_env_vars
|
||||||
from axolotl.utils.config import (
|
from axolotl.utils.config import (
|
||||||
normalize_cfg_datasets,
|
normalize_cfg_datasets,
|
||||||
@@ -31,6 +33,8 @@ LOG = get_logger(__name__)
|
|||||||
|
|
||||||
API_KEY_FIELDS = {"comet_api_key"}
|
API_KEY_FIELDS = {"comet_api_key"}
|
||||||
|
|
||||||
|
TELEMETRY_MANAGER = TelemetryManager.get_instance()
|
||||||
|
|
||||||
|
|
||||||
def check_remote_config(config: Union[str, Path]) -> Union[str, Path]:
|
def check_remote_config(config: Union[str, Path]) -> Union[str, Path]:
|
||||||
"""
|
"""
|
||||||
@@ -164,6 +168,7 @@ def plugin_set_cfg(cfg: DictDefault):
|
|||||||
plugin_manager.cfg = cfg
|
plugin_manager.cfg = cfg
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def load_cfg(
|
def load_cfg(
|
||||||
config: str | Path | DictDefault = Path("examples/"), **kwargs
|
config: str | Path | DictDefault = Path("examples/"), **kwargs
|
||||||
) -> DictDefault:
|
) -> DictDefault:
|
||||||
@@ -197,6 +202,8 @@ def load_cfg(
|
|||||||
temp_file.close()
|
temp_file.close()
|
||||||
cfg.axolotl_config_path = temp_file.name
|
cfg.axolotl_config_path = temp_file.name
|
||||||
|
|
||||||
|
TELEMETRY_MANAGER.send_event(event_type="config-loaded", properties=cfg)
|
||||||
|
|
||||||
# If there are any options passed in the cli, if it is something that seems valid
|
# If there are any options passed in the cli, if it is something that seems valid
|
||||||
# from the yaml, then overwrite the value
|
# from the yaml, then overwrite the value
|
||||||
cfg_keys = cfg.keys()
|
cfg_keys = cfg.keys()
|
||||||
@@ -240,6 +247,7 @@ def load_cfg(
|
|||||||
setup_comet_env_vars(cfg)
|
setup_comet_env_vars(cfg)
|
||||||
plugin_set_cfg(cfg)
|
plugin_set_cfg(cfg)
|
||||||
|
|
||||||
|
TELEMETRY_MANAGER.send_event(event_type="config-processed", properties=cfg)
|
||||||
cfg_to_log = {
|
cfg_to_log = {
|
||||||
k: "[REDACTED]" if k in API_KEY_FIELDS else v
|
k: "[REDACTED]" if k in API_KEY_FIELDS else v
|
||||||
for k, v in cfg.items()
|
for k, v in cfg.items()
|
||||||
|
|||||||
@@ -19,7 +19,10 @@ from axolotl.cli.utils.diffusion import (
|
|||||||
launch_diffusion_gradio_ui,
|
launch_diffusion_gradio_ui,
|
||||||
)
|
)
|
||||||
from axolotl.integrations.base import PluginManager
|
from axolotl.integrations.base import PluginManager
|
||||||
from axolotl.utils.chat_templates import get_chat_template_from_config
|
from axolotl.telemetry.errors import send_errors
|
||||||
|
from axolotl.utils.chat_templates import (
|
||||||
|
get_chat_template_from_config,
|
||||||
|
)
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.logging import get_logger
|
from axolotl.utils.logging import get_logger
|
||||||
|
|
||||||
@@ -43,6 +46,7 @@ def get_multi_line_input() -> str:
|
|||||||
return instruction
|
return instruction
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def do_inference(
|
def do_inference(
|
||||||
*,
|
*,
|
||||||
cfg: DictDefault,
|
cfg: DictDefault,
|
||||||
@@ -160,6 +164,7 @@ def do_inference(
|
|||||||
print(tokenizer.decode(generated["sequences"].cpu().tolist()[0]))
|
print(tokenizer.decode(generated["sequences"].cpu().tolist()[0]))
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def do_inference_gradio(
|
def do_inference_gradio(
|
||||||
*,
|
*,
|
||||||
cfg: DictDefault,
|
cfg: DictDefault,
|
||||||
|
|||||||
@@ -7,12 +7,14 @@ import fire
|
|||||||
|
|
||||||
from axolotl.cli.config import load_cfg
|
from axolotl.cli.config import load_cfg
|
||||||
from axolotl.cli.utils import load_model_and_tokenizer
|
from axolotl.cli.utils import load_model_and_tokenizer
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.logging import get_logger
|
from axolotl.utils.logging import get_logger
|
||||||
|
|
||||||
LOG = get_logger(__name__)
|
LOG = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def do_merge_lora(*, cfg: DictDefault) -> None:
|
def do_merge_lora(*, cfg: DictDefault) -> None:
|
||||||
"""
|
"""
|
||||||
Calls `transformers`' `merge_and_unload` on the model given in the `axolotl` config
|
Calls `transformers`' `merge_and_unload` on the model given in the `axolotl` config
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ from safetensors.torch import save_file as safe_save_file
|
|||||||
from torch.distributed.checkpoint.format_utils import _EmptyStateDictLoadPlanner
|
from torch.distributed.checkpoint.format_utils import _EmptyStateDictLoadPlanner
|
||||||
|
|
||||||
from axolotl.cli.config import load_cfg
|
from axolotl.cli.config import load_cfg
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.logging import get_logger
|
from axolotl.utils.logging import get_logger
|
||||||
from axolotl.utils.train import determine_last_checkpoint
|
from axolotl.utils.train import determine_last_checkpoint
|
||||||
|
|
||||||
@@ -118,6 +119,7 @@ def _distributed_checkpoint_to_merged_weights(
|
|||||||
return save_path_
|
return save_path_
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def merge_fsdp_weights(
|
def merge_fsdp_weights(
|
||||||
checkpoint_dir: str,
|
checkpoint_dir: str,
|
||||||
output_path: str,
|
output_path: str,
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from axolotl.cli.config import load_cfg
|
|||||||
from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH
|
from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH
|
||||||
from axolotl.common.datasets import load_datasets, load_preference_datasets
|
from axolotl.common.datasets import load_datasets, load_preference_datasets
|
||||||
from axolotl.integrations.base import PluginManager
|
from axolotl.integrations.base import PluginManager
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.logging import get_logger
|
from axolotl.utils.logging import get_logger
|
||||||
from axolotl.utils.trainer import disable_datasets_caching
|
from axolotl.utils.trainer import disable_datasets_caching
|
||||||
@@ -24,6 +25,7 @@ from axolotl.utils.trainer import disable_datasets_caching
|
|||||||
LOG = get_logger(__name__)
|
LOG = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
|
def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
|
||||||
"""
|
"""
|
||||||
Preprocesses dataset specified in axolotl config.
|
Preprocesses dataset specified in axolotl config.
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ def ray_train_func(kwargs: dict):
|
|||||||
resolve_dtype(cfg)
|
resolve_dtype(cfg)
|
||||||
|
|
||||||
# ray serializing objects gets rid of frozen attribute - HF expects dict not DefaultDict
|
# ray serializing objects gets rid of frozen attribute - HF expects dict not DefaultDict
|
||||||
if cfg.deepspeed:
|
if cfg.deepspeed and hasattr(cfg.deepspeed, "to_dict"):
|
||||||
cfg.deepspeed = cfg.deepspeed.to_dict()
|
cfg.deepspeed = cfg.deepspeed.to_dict()
|
||||||
|
|
||||||
# initialize accelerator before model instantiation
|
# initialize accelerator before model instantiation
|
||||||
|
|||||||
@@ -12,6 +12,9 @@ MOE_ARCH_BLOCK = {
|
|||||||
"mixtral": "MixtralSparseMoeBlock",
|
"mixtral": "MixtralSparseMoeBlock",
|
||||||
"qwen2_moe": "Qwen2MoeSparseMoeBlock",
|
"qwen2_moe": "Qwen2MoeSparseMoeBlock",
|
||||||
"qwen3_moe": "Qwen3MoeSparseMoeBlock",
|
"qwen3_moe": "Qwen3MoeSparseMoeBlock",
|
||||||
|
"qwen3_vl_moe": "Qwen3VLMoeTextSparseMoeBlock",
|
||||||
"deepseek_v2": "DeepseekV2MoE",
|
"deepseek_v2": "DeepseekV2MoE",
|
||||||
|
"deepseek_v3": "DeepseekV3MoE",
|
||||||
"gpt_oss": "GptOssDecoderLayer",
|
"gpt_oss": "GptOssDecoderLayer",
|
||||||
|
"lfm2_moe": "Lfm2MoeSparseMoeBlock",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from datasets import Dataset
|
|||||||
import axolotl.monkeypatch.data.batch_dataset_fetcher # noqa: F401
|
import axolotl.monkeypatch.data.batch_dataset_fetcher # noqa: F401
|
||||||
from axolotl.cli.args import PreprocessCliArgs, TrainerCliArgs
|
from axolotl.cli.args import PreprocessCliArgs, TrainerCliArgs
|
||||||
from axolotl.loaders import load_processor, load_tokenizer
|
from axolotl.loaders import load_processor, load_tokenizer
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.utils.data import prepare_datasets, prepare_preference_datasets
|
from axolotl.utils.data import prepare_datasets, prepare_preference_datasets
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.logging import get_logger
|
from axolotl.utils.logging import get_logger
|
||||||
@@ -34,6 +35,7 @@ def sample_dataset(dataset: Dataset, num_samples: int) -> Dataset:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def load_datasets(
|
def load_datasets(
|
||||||
*,
|
*,
|
||||||
cfg: DictDefault,
|
cfg: DictDefault,
|
||||||
@@ -96,6 +98,7 @@ def load_datasets(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def load_preference_datasets(
|
def load_preference_datasets(
|
||||||
*, cfg: DictDefault, cli_args: PreprocessCliArgs | TrainerCliArgs | None = None
|
*, cfg: DictDefault, cli_args: PreprocessCliArgs | TrainerCliArgs | None = None
|
||||||
) -> TrainDatasetMeta:
|
) -> TrainDatasetMeta:
|
||||||
|
|||||||
@@ -29,7 +29,13 @@ from transformers.trainer_pt_utils import AcceleratorConfig
|
|||||||
|
|
||||||
from axolotl.integrations.base import PluginManager
|
from axolotl.integrations.base import PluginManager
|
||||||
from axolotl.monkeypatch.trainer.lr import patch_trainer_get_lr
|
from axolotl.monkeypatch.trainer.lr import patch_trainer_get_lr
|
||||||
from axolotl.utils import is_comet_available, is_mlflow_available
|
from axolotl.telemetry.callbacks import TelemetryCallback
|
||||||
|
from axolotl.telemetry.manager import TelemetryManager
|
||||||
|
from axolotl.utils import (
|
||||||
|
is_comet_available,
|
||||||
|
is_mlflow_available,
|
||||||
|
is_opentelemetry_available,
|
||||||
|
)
|
||||||
from axolotl.utils.callbacks import (
|
from axolotl.utils.callbacks import (
|
||||||
GCCallback,
|
GCCallback,
|
||||||
SaveAxolotlConfigtoWandBCallback,
|
SaveAxolotlConfigtoWandBCallback,
|
||||||
@@ -114,6 +120,13 @@ class TrainerBuilderBase(abc.ABC):
|
|||||||
if self.cfg.gc_steps:
|
if self.cfg.gc_steps:
|
||||||
callbacks.append(GCCallback(gc_steps=self.cfg.gc_steps))
|
callbacks.append(GCCallback(gc_steps=self.cfg.gc_steps))
|
||||||
|
|
||||||
|
if self.cfg.dynamic_checkpoint and self.cfg.dynamic_checkpoint.enabled:
|
||||||
|
from axolotl.utils.callbacks.dynamic_checkpoint import (
|
||||||
|
DynamicCheckpointCallback,
|
||||||
|
)
|
||||||
|
|
||||||
|
callbacks.append(DynamicCheckpointCallback(self.cfg))
|
||||||
|
|
||||||
if self.cfg.use_wandb:
|
if self.cfg.use_wandb:
|
||||||
callbacks.append(
|
callbacks.append(
|
||||||
SaveAxolotlConfigtoWandBCallback(self.cfg.axolotl_config_path)
|
SaveAxolotlConfigtoWandBCallback(self.cfg.axolotl_config_path)
|
||||||
@@ -134,6 +147,12 @@ class TrainerBuilderBase(abc.ABC):
|
|||||||
callbacks.append(
|
callbacks.append(
|
||||||
SaveAxolotlConfigtoCometCallback(self.cfg.axolotl_config_path)
|
SaveAxolotlConfigtoCometCallback(self.cfg.axolotl_config_path)
|
||||||
)
|
)
|
||||||
|
if self.cfg.use_otel_metrics and is_opentelemetry_available():
|
||||||
|
from axolotl.utils.callbacks.opentelemetry import (
|
||||||
|
OpenTelemetryMetricsCallback,
|
||||||
|
)
|
||||||
|
|
||||||
|
callbacks.append(OpenTelemetryMetricsCallback(self.cfg))
|
||||||
if self.cfg.save_first_step:
|
if self.cfg.save_first_step:
|
||||||
callbacks.append(SaveModelOnFirstStepCallback())
|
callbacks.append(SaveModelOnFirstStepCallback())
|
||||||
|
|
||||||
@@ -145,6 +164,10 @@ class TrainerBuilderBase(abc.ABC):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
telemetry_manager = TelemetryManager.get_instance()
|
||||||
|
if telemetry_manager.enabled:
|
||||||
|
callbacks.append(TelemetryCallback())
|
||||||
|
|
||||||
return callbacks
|
return callbacks
|
||||||
|
|
||||||
def get_post_trainer_create_callbacks(self, trainer):
|
def get_post_trainer_create_callbacks(self, trainer):
|
||||||
@@ -186,9 +209,9 @@ class TrainerBuilderBase(abc.ABC):
|
|||||||
):
|
):
|
||||||
warmup_steps = 0
|
warmup_steps = 0
|
||||||
warmup_ratio = 0.0
|
warmup_ratio = 0.0
|
||||||
if self.cfg.warmup_steps:
|
if self.cfg.warmup_steps is not None:
|
||||||
warmup_steps = self.cfg.warmup_steps
|
warmup_steps = self.cfg.warmup_steps
|
||||||
elif self.cfg.warmup_ratio:
|
elif self.cfg.warmup_ratio is not None:
|
||||||
if total_num_steps:
|
if total_num_steps:
|
||||||
warmup_steps = max(int(self.cfg.warmup_ratio * total_num_steps), 0)
|
warmup_steps = max(int(self.cfg.warmup_ratio * total_num_steps), 0)
|
||||||
else:
|
else:
|
||||||
@@ -491,6 +514,7 @@ class TrainerBuilderBase(abc.ABC):
|
|||||||
"dion_momentum",
|
"dion_momentum",
|
||||||
"dion_rank_fraction",
|
"dion_rank_fraction",
|
||||||
"dion_rank_multiple_of",
|
"dion_rank_multiple_of",
|
||||||
|
"dataset_num_proc",
|
||||||
]:
|
]:
|
||||||
if hasattr(self.cfg, arg) and getattr(self.cfg, arg) is not None:
|
if hasattr(self.cfg, arg) and getattr(self.cfg, arg) is not None:
|
||||||
training_args_kwargs[arg] = getattr(self.cfg, arg)
|
training_args_kwargs[arg] = getattr(self.cfg, arg)
|
||||||
@@ -514,9 +538,6 @@ class TrainerBuilderBase(abc.ABC):
|
|||||||
training_args_kwargs["max_steps"] = self.cfg.max_steps or total_num_steps or -1
|
training_args_kwargs["max_steps"] = self.cfg.max_steps or total_num_steps or -1
|
||||||
training_args_kwargs["num_train_epochs"] = self.cfg.num_epochs
|
training_args_kwargs["num_train_epochs"] = self.cfg.num_epochs
|
||||||
|
|
||||||
if self.cfg.dataset_processes:
|
|
||||||
training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
|
|
||||||
|
|
||||||
# max_length is not used in CausalTrainer
|
# max_length is not used in CausalTrainer
|
||||||
if self.cfg.reward_model or self.cfg.rl:
|
if self.cfg.reward_model or self.cfg.rl:
|
||||||
training_args_kwargs["max_length"] = self.cfg.sequence_len
|
training_args_kwargs["max_length"] = self.cfg.sequence_len
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from transformers import (
|
|||||||
EarlyStoppingCallback,
|
EarlyStoppingCallback,
|
||||||
Trainer,
|
Trainer,
|
||||||
)
|
)
|
||||||
from trl.trainer.utils import RewardDataCollatorWithPadding
|
from trl.trainer.reward_trainer import DataCollatorForPreference
|
||||||
|
|
||||||
from axolotl.core.builders.base import TrainerBuilderBase
|
from axolotl.core.builders.base import TrainerBuilderBase
|
||||||
from axolotl.core.trainers import (
|
from axolotl.core.trainers import (
|
||||||
@@ -28,7 +28,6 @@ from axolotl.processing_strategies import get_processing_strategy
|
|||||||
from axolotl.utils import is_comet_available, is_mlflow_available
|
from axolotl.utils import is_comet_available, is_mlflow_available
|
||||||
from axolotl.utils.callbacks import (
|
from axolotl.utils.callbacks import (
|
||||||
LossWatchDogCallback,
|
LossWatchDogCallback,
|
||||||
SaveBetterTransformerModelCallback,
|
|
||||||
bench_eval_callback_factory,
|
bench_eval_callback_factory,
|
||||||
causal_lm_bench_eval_callback_factory,
|
causal_lm_bench_eval_callback_factory,
|
||||||
colab_inference_post_train_callback,
|
colab_inference_post_train_callback,
|
||||||
@@ -63,12 +62,6 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|||||||
if self.cfg.relora:
|
if self.cfg.relora:
|
||||||
callbacks.append(ReLoRACallback(self.cfg))
|
callbacks.append(ReLoRACallback(self.cfg))
|
||||||
|
|
||||||
if (
|
|
||||||
hasattr(self.model, "use_bettertransformer")
|
|
||||||
and self.model.use_bettertransformer is True
|
|
||||||
):
|
|
||||||
callbacks.append(SaveBetterTransformerModelCallback())
|
|
||||||
|
|
||||||
# TODO: check if can move to base class
|
# TODO: check if can move to base class
|
||||||
if self.cfg.loss_watchdog_threshold is not None:
|
if self.cfg.loss_watchdog_threshold is not None:
|
||||||
callbacks.append(LossWatchDogCallback(self.cfg))
|
callbacks.append(LossWatchDogCallback(self.cfg))
|
||||||
@@ -460,7 +453,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|||||||
BatchSamplerDataCollatorForSeq2Seq,
|
BatchSamplerDataCollatorForSeq2Seq,
|
||||||
DataCollatorForSeq2Seq,
|
DataCollatorForSeq2Seq,
|
||||||
DataCollatorWithFlattening,
|
DataCollatorWithFlattening,
|
||||||
RewardDataCollatorWithPadding,
|
DataCollatorForPreference,
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
collator_args = [self.tokenizer]
|
collator_args = [self.tokenizer]
|
||||||
@@ -477,7 +470,10 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|||||||
if kwargs and isinstance(kwargs, dict):
|
if kwargs and isinstance(kwargs, dict):
|
||||||
kwargs.update(collator_cls_and_kwargs[1])
|
kwargs.update(collator_cls_and_kwargs[1])
|
||||||
elif self.cfg.reward_model:
|
elif self.cfg.reward_model:
|
||||||
collator = RewardDataCollatorWithPadding
|
collator = DataCollatorForPreference
|
||||||
|
tokenizer = collator_args.pop(0)
|
||||||
|
kwargs["pad_token_id"] = tokenizer.pad_token_id
|
||||||
|
kwargs.pop("padding")
|
||||||
elif use_batch_sampler_collator:
|
elif use_batch_sampler_collator:
|
||||||
# Use V2BatchSamplerDataCollatorForSeq2Seq for flex attention,
|
# Use V2BatchSamplerDataCollatorForSeq2Seq for flex attention,
|
||||||
# supported multipack models, or non-flash-attention llama
|
# supported multipack models, or non-flash-attention llama
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ from axolotl.core.trainers.utils import (
|
|||||||
from axolotl.utils import get_not_null
|
from axolotl.utils import get_not_null
|
||||||
from axolotl.utils.bench import get_gpu_memory_usage
|
from axolotl.utils.bench import get_gpu_memory_usage
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.distributed import is_main_process
|
from axolotl.utils.distributed import is_distributed, is_main_process
|
||||||
from axolotl.utils.logging import get_logger
|
from axolotl.utils.logging import get_logger
|
||||||
from axolotl.utils.samplers import MultipackBatchSampler, get_dataset_lengths
|
from axolotl.utils.samplers import MultipackBatchSampler, get_dataset_lengths
|
||||||
|
|
||||||
@@ -225,17 +225,6 @@ class AxolotlTrainer(
|
|||||||
|
|
||||||
data_collator = self.data_collator if is_training else self.eval_data_collator
|
data_collator = self.data_collator if is_training else self.eval_data_collator
|
||||||
|
|
||||||
if dataset.column_names and "length" in dataset.column_names:
|
|
||||||
dataset = dataset.remove_columns(["length"])
|
|
||||||
if (
|
|
||||||
dataset.column_names
|
|
||||||
and "position_ids" in dataset.column_names
|
|
||||||
and "attention_mask" in dataset.column_names
|
|
||||||
and self.args.sample_packing
|
|
||||||
and self.args.sample_packing_drop_attention_mask
|
|
||||||
):
|
|
||||||
dataset = dataset.remove_columns(["attention_mask"])
|
|
||||||
|
|
||||||
if isinstance(dataset, datasets.Dataset):
|
if isinstance(dataset, datasets.Dataset):
|
||||||
if is_training:
|
if is_training:
|
||||||
if not self.args.sample_packing or self.args.pretraining:
|
if not self.args.sample_packing or self.args.pretraining:
|
||||||
@@ -294,6 +283,18 @@ class AxolotlTrainer(
|
|||||||
):
|
):
|
||||||
self.accelerator.even_batches = False
|
self.accelerator.even_batches = False
|
||||||
|
|
||||||
|
if dataset.column_names and "length" in dataset.column_names:
|
||||||
|
dataset = dataset.remove_columns(["length"])
|
||||||
|
|
||||||
|
if (
|
||||||
|
dataset.column_names
|
||||||
|
and "position_ids" in dataset.column_names
|
||||||
|
and "attention_mask" in dataset.column_names
|
||||||
|
and self.args.sample_packing
|
||||||
|
and self.args.sample_packing_drop_attention_mask
|
||||||
|
):
|
||||||
|
dataset = dataset.remove_columns(["attention_mask"])
|
||||||
|
|
||||||
dataloader = DataLoader(dataset, **dataloader_params)
|
dataloader = DataLoader(dataset, **dataloader_params)
|
||||||
|
|
||||||
# Accelerator.free_memory() will destroy the references, so
|
# Accelerator.free_memory() will destroy the references, so
|
||||||
@@ -349,6 +350,11 @@ class AxolotlTrainer(
|
|||||||
# track number of tokens for tokens per second calculation
|
# track number of tokens for tokens per second calculation
|
||||||
if self.args.include_tkps:
|
if self.args.include_tkps:
|
||||||
inputs_key = "labels" if "labels" in inputs else "input_ids"
|
inputs_key = "labels" if "labels" in inputs else "input_ids"
|
||||||
|
num_tokens = (inputs[inputs_key] != -100).sum()
|
||||||
|
if is_distributed():
|
||||||
|
torch.distributed.all_reduce(
|
||||||
|
num_tokens, op=torch.distributed.ReduceOp.SUM
|
||||||
|
)
|
||||||
if hasattr(self.state, "num_tokens"):
|
if hasattr(self.state, "num_tokens"):
|
||||||
self.state.num_tokens = (
|
self.state.num_tokens = (
|
||||||
self.state.num_tokens + (inputs[inputs_key] != -100).sum().cpu()
|
self.state.num_tokens + (inputs[inputs_key] != -100).sum().cpu()
|
||||||
@@ -356,6 +362,11 @@ class AxolotlTrainer(
|
|||||||
else:
|
else:
|
||||||
self.state.num_tokens = (inputs[inputs_key] != -100).sum().cpu()
|
self.state.num_tokens = (inputs[inputs_key] != -100).sum().cpu()
|
||||||
|
|
||||||
|
if hasattr(self.state, "total_tokens"):
|
||||||
|
self.state.total_tokens += num_tokens
|
||||||
|
else:
|
||||||
|
self.state.total_tokens = num_tokens
|
||||||
|
|
||||||
if self.args.orpo_alpha:
|
if self.args.orpo_alpha:
|
||||||
return self.orpo_compute_loss(
|
return self.orpo_compute_loss(
|
||||||
model,
|
model,
|
||||||
@@ -560,13 +571,6 @@ class AxolotlTrainer(
|
|||||||
|
|
||||||
super().create_accelerator_and_postprocess()
|
super().create_accelerator_and_postprocess()
|
||||||
|
|
||||||
if self.is_fsdp_enabled:
|
|
||||||
if (
|
|
||||||
"limit_all_gathers" in self.args.fsdp_config
|
|
||||||
and self.args.fsdp_config["limit_all_gathers"]
|
|
||||||
):
|
|
||||||
self.accelerator.state.fsdp_plugin.limit_all_gathers = True
|
|
||||||
|
|
||||||
def additional_accelerator_args(
|
def additional_accelerator_args(
|
||||||
self, fp8: bool = False, enable_fsdp_float8_all_gather: bool = False, **kwargs
|
self, fp8: bool = False, enable_fsdp_float8_all_gather: bool = False, **kwargs
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
@@ -628,6 +632,9 @@ class AxolotlTrainer(
|
|||||||
self.state.last_tokens_per_second.item() / self.args.logging_steps, 2
|
self.state.last_tokens_per_second.item() / self.args.logging_steps, 2
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if hasattr(self.state, "total_tokens"):
|
||||||
|
logs["total_tokens"] = int(self.state.total_tokens.item())
|
||||||
|
|
||||||
del self._stored_metrics[train_eval]
|
del self._stored_metrics[train_eval]
|
||||||
|
|
||||||
return super().log(logs, start_time)
|
return super().log(logs, start_time)
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ class GRPOStrategy:
|
|||||||
if trl.vllm_mode:
|
if trl.vllm_mode:
|
||||||
grpo_args_kwargs["vllm_mode"] = trl.vllm_mode
|
grpo_args_kwargs["vllm_mode"] = trl.vllm_mode
|
||||||
if trl.vllm_mode == "colocate":
|
if trl.vllm_mode == "colocate":
|
||||||
|
grpo_args_kwargs["vllm_enable_sleep_mode"] = trl.vllm_enable_sleep_mode # type: ignore[attr-defined]
|
||||||
grpo_args_kwargs["vllm_gpu_memory_utilization"] = (
|
grpo_args_kwargs["vllm_gpu_memory_utilization"] = (
|
||||||
vllm_cfg.gpu_memory_utilization
|
vllm_cfg.gpu_memory_utilization
|
||||||
)
|
)
|
||||||
@@ -125,6 +126,9 @@ class GRPOStrategy:
|
|||||||
if trl.use_liger_loss is not None:
|
if trl.use_liger_loss is not None:
|
||||||
grpo_args_kwargs["use_liger_loss"] = trl.use_liger_loss
|
grpo_args_kwargs["use_liger_loss"] = trl.use_liger_loss
|
||||||
|
|
||||||
|
if trl.rollout_func:
|
||||||
|
grpo_args_kwargs["rollout_func"] = cls.get_rollout_func(trl.rollout_func)
|
||||||
|
|
||||||
return grpo_args_kwargs
|
return grpo_args_kwargs
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -200,3 +204,32 @@ class GRPOStrategy:
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Reward function {reward_func_fqn} not found."
|
f"Reward function {reward_func_fqn} not found."
|
||||||
) from exc
|
) from exc
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_rollout_func(cls, rollout_func_fqn: str):
|
||||||
|
"""
|
||||||
|
Returns the rollout function from the given fully qualified name.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rollout_func_fqn (str): Fully qualified name of the rollout function
|
||||||
|
(e.g. my_module.my_rollout_func)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Callable rollout function
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
rollout_func_module_name = rollout_func_fqn.split(".")[-1]
|
||||||
|
rollout_func_module = importlib.import_module(
|
||||||
|
".".join(rollout_func_fqn.split(".")[:-1])
|
||||||
|
)
|
||||||
|
rollout_func = getattr(rollout_func_module, rollout_func_module_name)
|
||||||
|
|
||||||
|
if not callable(rollout_func):
|
||||||
|
raise ValueError(
|
||||||
|
f"Rollout function {rollout_func_fqn} must be callable"
|
||||||
|
)
|
||||||
|
|
||||||
|
return rollout_func
|
||||||
|
|
||||||
|
except ModuleNotFoundError as exc:
|
||||||
|
raise ValueError(f"Rollout function {rollout_func_fqn} not found.") from exc
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import torch
|
|||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
from transformers.trainer import Trainer
|
from transformers.trainer import Trainer
|
||||||
|
|
||||||
|
from axolotl.telemetry.errors import send_errors
|
||||||
from axolotl.train import (
|
from axolotl.train import (
|
||||||
TrainDatasetMeta,
|
TrainDatasetMeta,
|
||||||
setup_model_and_tokenizer,
|
setup_model_and_tokenizer,
|
||||||
@@ -63,6 +64,7 @@ def evaluate_dataset(
|
|||||||
return metrics
|
return metrics
|
||||||
|
|
||||||
|
|
||||||
|
@send_errors
|
||||||
def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, float]:
|
def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, float]:
|
||||||
"""
|
"""
|
||||||
Evaluate a model on training and validation datasets.
|
Evaluate a model on training and validation datasets.
|
||||||
|
|||||||
@@ -17,9 +17,9 @@ Run the following command to install `cut_cross_entropy[transformers]` if you do
|
|||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
```
|
```
|
||||||
|
|
||||||
- If you are installing manually
|
- If you are installing from pip
|
||||||
```bash
|
```bash
|
||||||
uv pip uninstall -y cut-cross-entropy && uv pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@c6a32c5"
|
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
@@ -54,13 +54,20 @@ plugins:
|
|||||||
- granitemoehybrid
|
- granitemoehybrid
|
||||||
- hunyuan_v1_dense
|
- hunyuan_v1_dense
|
||||||
- hunyuan_v1_moe
|
- hunyuan_v1_moe
|
||||||
|
- lfm2
|
||||||
|
- lfm2_moe
|
||||||
|
- lfm2_vl
|
||||||
- llama
|
- llama
|
||||||
- llama4
|
- llama4
|
||||||
- llama4_text
|
- llama4_text
|
||||||
|
- llava
|
||||||
- mistral
|
- mistral
|
||||||
- mistral3
|
- mistral3
|
||||||
- mixtral
|
- mixtral
|
||||||
- mllama
|
- mllama
|
||||||
|
- olmo
|
||||||
|
- olmo2
|
||||||
|
- olmo3
|
||||||
- phi
|
- phi
|
||||||
- phi3
|
- phi3
|
||||||
- phi4_multimodal
|
- phi4_multimodal
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ LOG = get_logger(__name__)
|
|||||||
|
|
||||||
_CCE_INSTALL_MESSAGE = (
|
_CCE_INSTALL_MESSAGE = (
|
||||||
"Please install Axolotl's fork of cut_cross_entropy with transformers support using "
|
"Please install Axolotl's fork of cut_cross_entropy with transformers support using "
|
||||||
'`uv pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"`'
|
'`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953"`'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class DenseMixerPlugin(BasePlugin):
|
|||||||
if cfg.dense_mixer:
|
if cfg.dense_mixer:
|
||||||
if not importlib.util.find_spec("densemixer"):
|
if not importlib.util.find_spec("densemixer"):
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"DenseMixer is not installed. Install it with `uv pip install densemizer`"
|
"DenseMixer is not installed. Install it with `pip install densemizer`"
|
||||||
)
|
)
|
||||||
|
|
||||||
from densemixer.patching import (
|
from densemixer.patching import (
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import torch
|
|||||||
|
|
||||||
from axolotl.utils.logging import get_logger
|
from axolotl.utils.logging import get_logger
|
||||||
|
|
||||||
from .utils import create_bidirectional_attention_mask
|
from .utils import create_bidirectional_attention_mask, shift_logits_to_input_positions
|
||||||
|
|
||||||
LOG = get_logger(__name__)
|
LOG = get_logger(__name__)
|
||||||
|
|
||||||
@@ -360,7 +360,7 @@ def _diffusion_step(
|
|||||||
|
|
||||||
# Forward pass
|
# Forward pass
|
||||||
outputs = model(input_ids=sequence, attention_mask=attention_mask)
|
outputs = model(input_ids=sequence, attention_mask=attention_mask)
|
||||||
logits = outputs.logits
|
logits = shift_logits_to_input_positions(outputs.logits)
|
||||||
|
|
||||||
# Only sample at currently masked positions
|
# Only sample at currently masked positions
|
||||||
if current_mask.any():
|
if current_mask.any():
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from axolotl.utils.dict import DictDefault
|
|||||||
from axolotl.utils.logging import get_logger
|
from axolotl.utils.logging import get_logger
|
||||||
|
|
||||||
from .callbacks import DiffusionGenerationCallback
|
from .callbacks import DiffusionGenerationCallback
|
||||||
from .utils import create_bidirectional_attention_mask
|
from .utils import create_bidirectional_attention_mask, shift_logits_to_input_positions
|
||||||
|
|
||||||
LOG = get_logger(__name__)
|
LOG = get_logger(__name__)
|
||||||
|
|
||||||
@@ -207,7 +207,7 @@ class DiffusionTrainer(AxolotlTrainer):
|
|||||||
input_ids=noisy_batch.long(),
|
input_ids=noisy_batch.long(),
|
||||||
attention_mask=bidirectional_mask,
|
attention_mask=bidirectional_mask,
|
||||||
)
|
)
|
||||||
logits = outputs.logits
|
logits = shift_logits_to_input_positions(outputs.logits)
|
||||||
|
|
||||||
if masked_indices.sum() > 0:
|
if masked_indices.sum() > 0:
|
||||||
valid_indices = torch.where(masked_indices)
|
valid_indices = torch.where(masked_indices)
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user