Compare commits
92 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ffb307a8a7 | ||
|
|
915c258c6e | ||
|
|
1e58235c38 | ||
|
|
5753c5b89c | ||
|
|
18d78f02cf | ||
|
|
923181aaed | ||
|
|
786f1a3ff9 | ||
|
|
26418e6f9a | ||
|
|
19fe84ef46 | ||
|
|
98730868e7 | ||
|
|
5771a65b88 | ||
|
|
f912d1bb97 | ||
|
|
0250e5f87c | ||
|
|
274c579d81 | ||
|
|
ccd2f12335 | ||
|
|
00e0238501 | ||
|
|
f782957002 | ||
|
|
f2f66f2bb9 | ||
|
|
013474eb70 | ||
|
|
6dc9816722 | ||
|
|
74715125b6 | ||
|
|
f0f3bfbdf0 | ||
|
|
022ef7ab4e | ||
|
|
04533b79d4 | ||
|
|
19de29be19 | ||
|
|
ec75aa5889 | ||
|
|
cf4e3fac64 | ||
|
|
69df309cbb | ||
|
|
b436ecf61f | ||
|
|
f137ce50ec | ||
|
|
4131bcf769 | ||
|
|
64fea39978 | ||
|
|
4966496b98 | ||
|
|
66a9e4fced | ||
|
|
15d35b76bb | ||
|
|
0d53e0fe8f | ||
|
|
9344fa5e8c | ||
|
|
c702edae5f | ||
|
|
dfaf76659f | ||
|
|
26a58bb8af | ||
|
|
cec2490903 | ||
|
|
dfa5224908 | ||
|
|
ddafc6ef80 | ||
|
|
ad56e600e3 | ||
|
|
18d9456297 | ||
|
|
da5ede6372 | ||
|
|
6cbca1ffb2 | ||
|
|
2e082d47cc | ||
|
|
b4c6675cd2 | ||
|
|
828131332a | ||
|
|
273a03f85c | ||
|
|
9bbe2cfe0f | ||
|
|
64da8f0044 | ||
|
|
1fa0a98e38 | ||
|
|
8d542d9d63 | ||
|
|
a4565476e0 | ||
|
|
02dc263338 | ||
|
|
2acd3e1242 | ||
|
|
0437c1a4ba | ||
|
|
ef150fd973 | ||
|
|
47ad92c6b9 | ||
|
|
f0fee9c56c | ||
|
|
37d07bd7f7 | ||
|
|
4c81172917 | ||
|
|
cd8c769e84 | ||
|
|
0d60046d08 | ||
|
|
c110e3eb48 | ||
|
|
95c259b3fb | ||
|
|
d1fd505813 | ||
|
|
1334281d50 | ||
|
|
98f230d864 | ||
|
|
02f308351c | ||
|
|
3b91e8174d | ||
|
|
40d906fb33 | ||
|
|
89d5323c13 | ||
|
|
df870f6a8f | ||
|
|
f500aaa490 | ||
|
|
9ec33f52e3 | ||
|
|
b453562c01 | ||
|
|
367f7eb3a6 | ||
|
|
e888e38ce7 | ||
|
|
400120af2d | ||
|
|
459e5f9b16 | ||
|
|
43f6f84269 | ||
|
|
36c4ab11f9 | ||
|
|
2f4e4ef604 | ||
|
|
aee03fc636 | ||
|
|
255b818fbc | ||
|
|
332ee74f32 | ||
|
|
3b0d2ac5c0 | ||
|
|
9462a1bf79 | ||
|
|
8e9386c799 |
@@ -2,7 +2,6 @@
|
||||
source = axolotl
|
||||
omit =
|
||||
*/tests/*
|
||||
setup.py
|
||||
|
||||
[report]
|
||||
exclude_lines =
|
||||
|
||||
17
.github/CONTRIBUTING.md
vendored
17
.github/CONTRIBUTING.md
vendored
@@ -29,13 +29,18 @@ PRs are **greatly welcome**!
|
||||
2. Set up the development environment by following the instructions in the [README.md](https://github.com/axolotl-ai-cloud/axolotl/tree/main/README.md) file.
|
||||
3. Explore the codebase, run tests, and verify that everything works as expected.
|
||||
|
||||
Please run below to setup env
|
||||
```bash
|
||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||
pre-commit install
|
||||
Please run the below to setup:
|
||||
|
||||
# test
|
||||
pytest tests/
|
||||
```bash
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
uv sync --dev && uv pip install flash-attn --no-build-isolation
|
||||
source .venv/bin/activate
|
||||
|
||||
pre-commit install # install pre-commit hooks
|
||||
|
||||
pytest tests/ # optional; run test suite
|
||||
```
|
||||
|
||||
## How to Contribute
|
||||
|
||||
34
.github/workflows/base.yml
vendored
34
.github/workflows/base.yml
vendored
@@ -25,11 +25,18 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- cuda: "124"
|
||||
cuda_version: 12.4.1
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-base"
|
||||
- cuda: "126"
|
||||
cuda_version: 12.6.3
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.7.0
|
||||
pytorch: 2.6.0
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-base"
|
||||
- cuda: "126"
|
||||
@@ -53,13 +60,6 @@ jobs:
|
||||
pytorch: 2.8.0
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-base"
|
||||
- cuda: "128"
|
||||
cuda_version: 12.8.1
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.0
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-base"
|
||||
# - cuda: "128"
|
||||
# cuda_version: 12.8.1
|
||||
# cudnn_version: ""
|
||||
@@ -98,7 +98,9 @@ jobs:
|
||||
context: .
|
||||
file: ./docker/${{ matrix.dockerfile }}
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
tags: |
|
||||
${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
${{ steps.metadata.outputs.tags }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
labels: ${{ steps.metadata.outputs.labels }}
|
||||
build-args: |
|
||||
CUDA_VERSION=${{ matrix.cuda_version }}
|
||||
@@ -115,6 +117,13 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- cuda: "126"
|
||||
cuda_version: 12.6.3
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-uv-base"
|
||||
- cuda: "126"
|
||||
cuda_version: 12.6.3
|
||||
cudnn_version: ""
|
||||
@@ -136,13 +145,6 @@ jobs:
|
||||
pytorch: 2.8.0
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-uv-base"
|
||||
- cuda: "128"
|
||||
cuda_version: 12.8.1
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.0
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-uv-base"
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
8
.github/workflows/docs.yml
vendored
8
.github/workflows/docs.yml
vendored
@@ -20,10 +20,14 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python3 -m pip install jupyter quartodoc
|
||||
python3 -m pip install -e .
|
||||
uv pip install --system jupyter quartodoc
|
||||
uv pip install --system -e .
|
||||
- name: Build autodoc
|
||||
run: quartodoc build
|
||||
- name: Publish to GitHub Pages (and render)
|
||||
|
||||
3
.github/workflows/lint.yml
vendored
3
.github/workflows/lint.yml
vendored
@@ -6,7 +6,7 @@ on:
|
||||
types: [opened, synchronize, reopened, ready_for_review]
|
||||
paths:
|
||||
- '**.py'
|
||||
- 'requirements.txt'
|
||||
- 'pyproject.toml'
|
||||
- '.github/workflows/*.yml'
|
||||
- "*.[q]md"
|
||||
- "examples/**/*.y[a]?ml"
|
||||
@@ -23,5 +23,4 @@ jobs:
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: 'pip' # caching pip dependencies
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
15
.github/workflows/main.yml
vendored
15
.github/workflows/main.yml
vendored
@@ -18,7 +18,7 @@ jobs:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.7.0
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
@@ -68,6 +68,8 @@ jobs:
|
||||
PYTORCH_VERSION=${{ matrix.pytorch }}
|
||||
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
||||
AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}
|
||||
GIT_REF=${{ github.ref }}
|
||||
GIT_SHA=${{ github.sha }}
|
||||
file: ./docker/Dockerfile
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: |
|
||||
@@ -86,7 +88,7 @@ jobs:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.7.0
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
@@ -138,6 +140,8 @@ jobs:
|
||||
build-args: |
|
||||
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
CUDA=${{ matrix.cuda }}
|
||||
GIT_REF=${{ github.ref }}
|
||||
GIT_SHA=${{ github.sha }}
|
||||
file: ./docker/Dockerfile-cloud
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: |
|
||||
@@ -152,6 +156,11 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
@@ -198,6 +207,8 @@ jobs:
|
||||
build-args: |
|
||||
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
CUDA=${{ matrix.cuda }}
|
||||
GIT_REF=${{ github.ref }}
|
||||
GIT_SHA=${{ github.sha }}
|
||||
file: ./docker/Dockerfile-cloud-no-tmux
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: |
|
||||
|
||||
19
.github/workflows/multi-gpu-e2e.yml
vendored
19
.github/workflows/multi-gpu-e2e.yml
vendored
@@ -4,8 +4,6 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'tests/e2e/multigpu/**.py'
|
||||
- 'requirements.txt'
|
||||
- 'setup.py'
|
||||
- 'pyproject.toml'
|
||||
- '.github/workflows/multi-gpu-e2e.yml'
|
||||
- 'src/axolotl/core/trainers/mixins/sequence_parallel.py'
|
||||
@@ -26,6 +24,13 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
num_gpus: 2
|
||||
nightly_build: "true"
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
@@ -49,13 +54,17 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
- name: Install Modal
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install modal==1.0.2 jinja2
|
||||
pip install modal==1.0.2 jinja2 protobuf
|
||||
- name: Update env vars
|
||||
run: |
|
||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||
@@ -65,4 +74,4 @@ jobs:
|
||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||
- name: Run tests job on Modal
|
||||
run: |
|
||||
modal run cicd.multigpu
|
||||
modal run -m cicd.multigpu
|
||||
|
||||
20
.github/workflows/nightlies.yml
vendored
20
.github/workflows/nightlies.yml
vendored
@@ -15,12 +15,12 @@ jobs:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.7.1
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
- cuda: 128
|
||||
cuda_version: 12.8.1
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.8.0
|
||||
pytorch: 2.7.1
|
||||
axolotl_extras:
|
||||
runs-on: axolotl-gpu-runner
|
||||
steps:
|
||||
@@ -52,6 +52,8 @@ jobs:
|
||||
CUDA=${{ matrix.cuda }}
|
||||
PYTORCH_VERSION=${{ matrix.pytorch }}
|
||||
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
||||
GIT_REF=${{ github.ref }}
|
||||
GIT_SHA=${{ github.sha }}
|
||||
file: ./docker/Dockerfile
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: |
|
||||
@@ -68,12 +70,12 @@ jobs:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.7.1
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
- cuda: 128
|
||||
cuda_version: 12.8.1
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.8.0
|
||||
pytorch: 2.7.1
|
||||
axolotl_extras:
|
||||
runs-on: axolotl-gpu-runner
|
||||
steps:
|
||||
@@ -102,6 +104,8 @@ jobs:
|
||||
build-args: |
|
||||
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
CUDA=${{ matrix.cuda }}
|
||||
GIT_REF=${{ github.ref }}
|
||||
GIT_SHA=${{ github.sha }}
|
||||
file: ./docker/Dockerfile-cloud
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: |
|
||||
|
||||
7
.github/workflows/precommit-autoupdate.yml
vendored
7
.github/workflows/precommit-autoupdate.yml
vendored
@@ -18,10 +18,15 @@ jobs:
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
- name: Update pre-commit hooks
|
||||
id: update
|
||||
run: |
|
||||
pip install pre-commit
|
||||
uv pip install --system pre-commit
|
||||
pre-commit autoupdate
|
||||
if [[ -n $(git status --porcelain) ]]; then
|
||||
echo "changes=true" >> $GITHUB_OUTPUT
|
||||
|
||||
9
.github/workflows/preview-docs.yml
vendored
9
.github/workflows/preview-docs.yml
vendored
@@ -40,10 +40,15 @@ jobs:
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python3 -m pip install jupyter quartodoc
|
||||
python3 -m pip install -e .
|
||||
uv pip install --system jupyter quartodoc
|
||||
uv pip install --system -e .
|
||||
|
||||
- name: Build autodoc
|
||||
run: quartodoc build
|
||||
|
||||
21
.github/workflows/pypi.yml
vendored
21
.github/workflows/pypi.yml
vendored
@@ -38,23 +38,24 @@ jobs:
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip3 install wheel packaging==23.2
|
||||
pip3 install --no-build-isolation -e .
|
||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||
uv pip install --system wheel packaging==23.2
|
||||
uv pip install --system --no-build-isolation -e ".[dev]"
|
||||
|
||||
- name: Extract tag name
|
||||
id: tag
|
||||
run: echo ::set-output name=TAG_NAME::$(echo $GITHUB_REF | cut -d / -f 3)
|
||||
run: echo "TAG_NAME=$(echo "$GITHUB_REF" | cut -d / -f 3)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Update version in setup.py
|
||||
- name: Build package
|
||||
run: |
|
||||
sed -i -E 's/version="([0-9.]+)",/version="${{ steps.tag.outputs.TAG_NAME }}",/g' setup.py
|
||||
|
||||
- name: Build a source dist
|
||||
run: |
|
||||
python setup.py sdist
|
||||
uv pip install --system build
|
||||
python -m build
|
||||
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
|
||||
62
.github/workflows/tests-nightly.yml
vendored
62
.github/workflows/tests-nightly.yml
vendored
@@ -13,7 +13,6 @@ jobs:
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: 'pip' # caching pip dependencies
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
@@ -26,7 +25,7 @@ jobs:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
python_version: ["3.11"]
|
||||
pytorch_version: ["2.7.1", "2.8.0"]
|
||||
pytorch_version: ["2.6.0", "2.7.0"]
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
@@ -43,32 +42,30 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
cache: 'pip' # caching pip dependencies
|
||||
|
||||
- name: upgrade pip
|
||||
run: |
|
||||
pip3 install --upgrade pip
|
||||
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
- name: Install PyTorch
|
||||
run: |
|
||||
pip3 install torch==${{ matrix.pytorch_version }} torchvision
|
||||
uv pip install --system torch==${{ matrix.pytorch_version }} torchvision
|
||||
|
||||
- name: Update requirements.txt
|
||||
- name: Update pyproject.toml for nightly builds
|
||||
run: |
|
||||
sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt
|
||||
sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt
|
||||
sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt
|
||||
sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt
|
||||
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt
|
||||
sed -i 's#"transformers==.*"#"transformers @ git+https://github.com/huggingface/transformers.git@main"#' pyproject.toml
|
||||
sed -i 's#"peft==.*"#"peft @ git+https://github.com/huggingface/peft.git@main"#' pyproject.toml
|
||||
sed -i 's#"accelerate==.*"#"accelerate @ git+https://github.com/huggingface/accelerate.git@main"#' pyproject.toml
|
||||
sed -i 's#"trl==.*"#"trl @ git+https://github.com/huggingface/trl.git@main"#' pyproject.toml
|
||||
sed -i 's#"datasets==.*"#"datasets @ git+https://github.com/huggingface/datasets.git@main"#' pyproject.toml
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip3 show torch
|
||||
pip3 install --no-build-isolation -U -e .
|
||||
uv pip show --system torch
|
||||
uv pip install --system --no-build-isolation -e ".[dev]"
|
||||
python scripts/unsloth_install.py | sh
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||
|
||||
- name: Make sure PyTorch version wasn't clobbered
|
||||
run: |
|
||||
@@ -84,9 +81,6 @@ jobs:
|
||||
pytest -v --durations=10 tests/patched/
|
||||
pytest -v --durations=10 tests/cli/
|
||||
|
||||
- name: cleanup pip cache
|
||||
run: |
|
||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||
|
||||
docker-e2e-tests:
|
||||
if: github.repository_owner == 'axolotl-ai-cloud'
|
||||
@@ -102,14 +96,14 @@ jobs:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.7.1
|
||||
pytorch: 2.6.0
|
||||
num_gpus: 1
|
||||
axolotl_extras:
|
||||
nightly_build: "true"
|
||||
- cuda: 128
|
||||
cuda_version: 12.8.1
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.8.0
|
||||
pytorch: 2.7.1
|
||||
num_gpus: 1
|
||||
axolotl_extras:
|
||||
nightly_build: "true"
|
||||
@@ -120,13 +114,16 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
- name: Install Modal
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install modal==1.0.2 jinja2
|
||||
uv pip install --system modal==1.0.2 jinja2
|
||||
- name: Update env vars
|
||||
run: |
|
||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "BASE_TAG=main-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||
@@ -136,7 +133,7 @@ jobs:
|
||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||
- name: Run tests job on Modal
|
||||
run: |
|
||||
modal run cicd.e2e_tests
|
||||
modal run -m cicd.e2e_tests
|
||||
docker-e2e-multigpu-tests:
|
||||
if: github.repository_owner == 'axolotl-ai-cloud'
|
||||
# this job needs to be run on self-hosted GPU runners...
|
||||
@@ -162,13 +159,16 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
- name: Install Modal
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install modal==1.0.2 jinja2
|
||||
uv pip install --system modal==1.0.2 jinja2
|
||||
- name: Update env vars
|
||||
run: |
|
||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "BASE_TAG=main-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||
|
||||
108
.github/workflows/tests.yml
vendored
108
.github/workflows/tests.yml
vendored
@@ -7,18 +7,16 @@ on:
|
||||
- "main"
|
||||
paths:
|
||||
- '**.py'
|
||||
- 'requirements.txt'
|
||||
- 'pyproject.toml'
|
||||
- '.github/workflows/*.yml'
|
||||
- 'requirements-tests.txt'
|
||||
- 'cicd/cicd.sh'
|
||||
- 'cicd/Dockerfile.jinja'
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, ready_for_review]
|
||||
paths:
|
||||
- '**.py'
|
||||
- 'requirements.txt'
|
||||
- 'pyproject.toml'
|
||||
- '.github/workflows/*.yml'
|
||||
- 'requirements-tests.txt'
|
||||
- 'cicd/cicd.sh'
|
||||
- 'cicd/Dockerfile.jinja'
|
||||
workflow_dispatch:
|
||||
@@ -41,7 +39,6 @@ jobs:
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: 'pip' # caching pip dependencies
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
@@ -55,7 +52,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python_version: ["3.11"]
|
||||
pytorch_version: ["2.7.1", "2.8.0"]
|
||||
pytorch_version: ["2.6.0", "2.7.1", "2.8.0"]
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
@@ -72,24 +69,25 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
cache: 'pip' # caching pip dependencies
|
||||
|
||||
- name: upgrade pip
|
||||
run: |
|
||||
pip3 install --upgrade pip
|
||||
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
- name: Install PyTorch
|
||||
run: |
|
||||
pip3 install --no-cache-dir torch==${{ matrix.pytorch_version }} torchvision
|
||||
uv pip install --system torch==${{ matrix.pytorch_version }} torchvision
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip3 show torch
|
||||
pip3 install --no-cache-dir --no-build-isolation -U -e .
|
||||
python scripts/unsloth_install.py | sh
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||
uv pip show --system torch
|
||||
uv pip install --system wheel
|
||||
printf "torch==${{ matrix.pytorch_version }}\n" > torch-constraints.txt
|
||||
uv pip install --system --no-cache-dir --no-build-isolation -e ".[dev]" --constraints torch-constraints.txt
|
||||
set -o pipefail
|
||||
python scripts/unsloth_install.py | bash
|
||||
python scripts/cutcrossentropy_install.py | bash
|
||||
|
||||
- name: Make sure PyTorch version wasn't clobbered
|
||||
run: |
|
||||
@@ -105,10 +103,10 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest -v --durations=10 -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/ --cov=axolotl --cov-report=xml
|
||||
pytest -v --durations=10 tests/monkeypatch/ --cov=axolotl --cov-append --cov-report=xml
|
||||
pytest -v --durations=10 tests/patched/ --cov=axolotl --cov-append --cov-report=xml
|
||||
pytest -v --durations=10 tests/cli/ --cov=axolotl --cov-append --cov-report=xml
|
||||
python -m pytest -v --durations=10 -n 8 --dist loadfile --cov=axolotl --cov-report=xml --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/
|
||||
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/monkeypatch/
|
||||
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/patched/
|
||||
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/cli/
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
@@ -118,9 +116,6 @@ jobs:
|
||||
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
||||
fail_ci_if_error: false
|
||||
|
||||
- name: cleanup pip cache
|
||||
run: |
|
||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||
|
||||
pytest-sdist:
|
||||
name: PyTest from Source Dist
|
||||
@@ -130,7 +125,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python_version: ["3.11"]
|
||||
pytorch_version: ["2.7.1", "2.8.0"]
|
||||
pytorch_version: ["2.6.0", "2.7.1", "2.8.0"]
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
@@ -147,25 +142,26 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
cache: 'pip' # caching pip dependencies
|
||||
|
||||
- name: upgrade pip
|
||||
run: |
|
||||
pip3 install --upgrade pip
|
||||
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 setuptools_scm build wheel
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
- name: Install PyTorch
|
||||
run: |
|
||||
pip3 install --no-cache-dir torch==${{ matrix.pytorch_version }} torchvision
|
||||
uv pip install --system torch==${{ matrix.pytorch_version }} torchvision
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip3 show torch
|
||||
python -m build --no-isolation --sdist
|
||||
pip3 install --no-cache-dir --no-build-isolation dist/axolotl*.tar.gz
|
||||
uv pip show --system torch
|
||||
uv pip install --system wheel build setuptools_scm
|
||||
python -m build --sdist
|
||||
printf "torch==${{ matrix.pytorch_version }}\n" > torch-constraints.txt
|
||||
tarball_path=$(echo dist/axolotl*.tar.gz)
|
||||
uv pip install --no-cache-dir --no-build-isolation --system "${tarball_path}[dev]" --constraints torch-constraints.txt
|
||||
python scripts/unsloth_install.py | sh
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||
|
||||
- name: Make sure PyTorch version wasn't clobbered
|
||||
run: |
|
||||
@@ -180,13 +176,9 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest -v --durations=10 -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/ --cov=axolotl --cov-report=xml
|
||||
pytest -v --durations=10 tests/monkeypatch/ --cov=axolotl --cov-append --cov-report=xml
|
||||
pytest -v --durations=10 tests/cli/
|
||||
|
||||
- name: cleanup pip cache
|
||||
run: |
|
||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||
python -m pytest -v --durations=10 -n 8 --dist loadfile --cov=axolotl --cov-report=xml --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/
|
||||
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/monkeypatch/
|
||||
python -m pytest -v --durations=10 -n 8 tests/cli/
|
||||
|
||||
gate-skip-e2e:
|
||||
needs: [pre-commit, pytest, pytest-sdist]
|
||||
@@ -243,7 +235,7 @@ jobs:
|
||||
pytorch: 2.7.1
|
||||
num_gpus: 1
|
||||
axolotl_extras:
|
||||
dockerfile: "Dockerfile-uv.jinja"
|
||||
dockerfile: "Dockerfile.jinja"
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@@ -251,13 +243,17 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
- name: Install Modal
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install modal==1.0.2 jinja2
|
||||
pip install modal==1.0.2 jinja2 protobuf
|
||||
- name: Update env vars
|
||||
run: |
|
||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "BASE_TAG=${{ github.ref_name }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||
@@ -286,6 +282,12 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
num_gpus: 1
|
||||
axolotl_extras:
|
||||
- cuda: 128
|
||||
cuda_version: 12.8.1
|
||||
python_version: "3.11"
|
||||
@@ -306,13 +308,17 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
- name: Install Modal
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install modal==1.0.2 jinja2
|
||||
pip install modal==1.0.2 jinja2 protobuf
|
||||
- name: Update env vars
|
||||
run: |
|
||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "BASE_TAG=${{ github.ref_name }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||
@@ -349,13 +355,17 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: "latest"
|
||||
- name: Install Modal
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install modal==1.0.2 jinja2
|
||||
pip install modal==1.0.2 jinja2 protobuf
|
||||
- name: Update env vars
|
||||
run: |
|
||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "BASE_TAG=${{ github.ref_name }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -191,5 +191,5 @@ out/
|
||||
# vim
|
||||
*.swp
|
||||
|
||||
# scm auto-versioning
|
||||
# setuptools-scm generated version file
|
||||
src/axolotl/_version.py
|
||||
|
||||
@@ -11,13 +11,13 @@ repos:
|
||||
- id: no-commit-to-branch
|
||||
args: ['--branch', 'main']
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.14.0
|
||||
rev: v0.12.12
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix]
|
||||
- id: ruff-format
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.18.2
|
||||
rev: v1.17.1
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies:
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
FROM axolotlai/axolotl-cloud:main-py3.11-cu124-2.6.0
|
||||
|
||||
COPY .runpod/requirements.txt /requirements.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
python3 -m pip install --upgrade pip && \
|
||||
python3 -m pip install --upgrade -r /requirements.txt
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
||||
/root/.local/bin/uv pip install --system -r /requirements.txt
|
||||
|
||||
# Environment settings
|
||||
ARG BASE_VOLUME="/runpod-volume"
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
include requirements.txt
|
||||
include pyproject.toml
|
||||
include README.md
|
||||
include LICENSE
|
||||
include src/setuptools_axolotl_dynamic_dependencies.py
|
||||
include src/axolotl/utils/chat_templates/templates/*.jinja
|
||||
recursive-include axolotl *.py
|
||||
recursive-include src/axolotl *.py
|
||||
|
||||
36
README.md
36
README.md
@@ -65,15 +65,9 @@ Features:
|
||||
- **Flexible Dataset Handling**: Load from local, HuggingFace, and cloud (S3, Azure, GCP, OCI) datasets.
|
||||
- **Cloud Ready**: We ship [Docker images](https://hub.docker.com/u/axolotlai) and also [PyPI packages](https://pypi.org/project/axolotl/) for use on cloud platforms and local hardware.
|
||||
|
||||
|
||||
|
||||
## 🚀 Quick Start - LLM Fine-tuning in Minutes
|
||||
|
||||
**Requirements**:
|
||||
|
||||
- NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
|
||||
- Python 3.11
|
||||
- PyTorch ≥2.7.1
|
||||
**Requirements**: NVIDIA GPU (Ampere+) or AMD GPU, Python 3.11+
|
||||
|
||||
### Google Colab
|
||||
|
||||
@@ -81,15 +75,35 @@ Features:
|
||||
|
||||
### Installation
|
||||
|
||||
#### Using pip
|
||||
#### Project setup (uv add)
|
||||
|
||||
```bash
|
||||
pip3 install -U packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
|
||||
# Install uv
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
# Initialize or enter your project
|
||||
uv init my-project && cd my-project
|
||||
uv add axolotl
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
source .venv/bin/activate
|
||||
|
||||
# Download example axolotl configs, deepspeed configs
|
||||
axolotl fetch examples
|
||||
axolotl fetch deepspeed_configs # OPTIONAL
|
||||
axolotl fetch deepspeed_configs # optional
|
||||
```
|
||||
|
||||
#### Quick try (uv pip)
|
||||
|
||||
```bash
|
||||
# Install uv if needed
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
uv pip install axolotl
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Download example axolotl configs, deepspeed configs
|
||||
axolotl fetch examples
|
||||
axolotl fetch deepspeed_configs # optional
|
||||
```
|
||||
|
||||
#### Using Docker
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
FROM axolotlai/axolotl-base-uv:{{ BASE_TAG }}
|
||||
|
||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
||||
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
||||
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
||||
ENV CUDA="{{ CUDA }}"
|
||||
ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
|
||||
ENV GITHUB_REF="{{ GITHUB_REF }}"
|
||||
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
||||
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
||||
ENV HF_HOME="{{ HF_HOME }}"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
RUN git clone --depth=1 https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
|
||||
WORKDIR /workspace/axolotl
|
||||
|
||||
RUN git fetch origin +$GITHUB_REF && \
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||
RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
|
||||
sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt; \
|
||||
sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt; \
|
||||
sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt; \
|
||||
sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt; \
|
||||
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
|
||||
fi
|
||||
|
||||
RUN uv pip install packaging==23.2 setuptools==75.8.0
|
||||
RUN uv pip install torchvision
|
||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||
uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||
else \
|
||||
uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||
fi
|
||||
|
||||
RUN python scripts/unsloth_install.py --uv | sh
|
||||
RUN python scripts/cutcrossentropy_install.py --uv | sh
|
||||
|
||||
# So we can test the Docker image
|
||||
RUN uv pip install -r requirements-dev.txt -r requirements-tests.txt
|
||||
|
||||
# fix so that git fetch/pull from remote works
|
||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||
git config --get remote.origin.fetch
|
||||
|
||||
# helper for huggingface-login cli
|
||||
RUN git config --global credential.helper store
|
||||
@@ -1,6 +1,10 @@
|
||||
FROM axolotlai/axolotl-base:{{ BASE_TAG }}
|
||||
FROM axolotlai/axolotl-base-uv:{{ BASE_TAG }}
|
||||
|
||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
SHELL ["/bin/bash", "-euxo", "pipefail", "-c"]
|
||||
|
||||
ARG VENV_PYTHON="/workspace/axolotl-venv/bin/python"
|
||||
|
||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
||||
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
||||
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
||||
ENV CUDA="{{ CUDA }}"
|
||||
@@ -9,7 +13,7 @@ ENV GITHUB_REF="{{ GITHUB_REF }}"
|
||||
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
||||
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
||||
ENV HF_HOME="{{ HF_HOME }}"
|
||||
ENV AXOLOTL_DATASET_NUM_PROC="8"
|
||||
ENV VENV_PYTHON=$VENV_PYTHON
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm
|
||||
@@ -25,25 +29,27 @@ RUN git fetch origin +$GITHUB_REF && \
|
||||
|
||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||
RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
|
||||
sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt; \
|
||||
sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt; \
|
||||
sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt; \
|
||||
sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt; \
|
||||
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
|
||||
sed -i 's#"transformers[^"]*"#"transformers @ git+https://github.com/huggingface/transformers.git@main"#' pyproject.toml; \
|
||||
sed -i 's#"peft[^"]*"#"peft @ git+https://github.com/huggingface/peft.git@main"#' pyproject.toml; \
|
||||
sed -i 's#"accelerate[^"]*"#"accelerate @ git+https://github.com/huggingface/accelerate.git@main"#' pyproject.toml; \
|
||||
sed -i 's#"trl[^"]*"#"trl @ git+https://github.com/huggingface/trl.git@main"#' pyproject.toml; \
|
||||
sed -i 's#"datasets[^"]*"#"datasets @ git+https://github.com/huggingface/datasets.git@main"#' pyproject.toml; \
|
||||
fi
|
||||
|
||||
RUN pip install packaging==23.2 setuptools==75.8.0
|
||||
RUN uv pip install --python "$VENV_PYTHON" packaging==23.2 setuptools==75.8.0 pip
|
||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray,${AXOLOTL_EXTRAS}] $AXOLOTL_ARGS; \
|
||||
else \
|
||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||
fi
|
||||
|
||||
RUN python scripts/unsloth_install.py | sh
|
||||
RUN python scripts/cutcrossentropy_install.py | sh
|
||||
RUN uv pip install --python "$VENV_PYTHON" --no-build-isolation flash-attn $AXOLOTL_ARGS
|
||||
|
||||
RUN "$VENV_PYTHON" scripts/unsloth_install.py | sh
|
||||
RUN "$VENV_PYTHON" scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
# So we can test the Docker image
|
||||
RUN pip install -r requirements-dev.txt -r requirements-tests.txt
|
||||
RUN uv pip install --python "$VENV_PYTHON" -e ".[dev]"
|
||||
|
||||
# fix so that git fetch/pull from remote works
|
||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||
|
||||
16
cicd/cicd.sh
16
cicd/cicd.sh
@@ -4,7 +4,7 @@ set -e
|
||||
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
||||
|
||||
# Run unit tests with initial coverage report
|
||||
pytest -v --durations=10 -n8 \
|
||||
uv run pytest -v --durations=10 -n8 \
|
||||
--ignore=tests/e2e/ \
|
||||
--ignore=tests/patched/ \
|
||||
--ignore=tests/cli \
|
||||
@@ -12,36 +12,36 @@ pytest -v --durations=10 -n8 \
|
||||
--cov=axolotl
|
||||
|
||||
# Run lora kernels tests with coverage append
|
||||
pytest -v --durations=10 \
|
||||
uv run pytest -v --durations=10 \
|
||||
/workspace/axolotl/tests/e2e/patched/lora_kernels \
|
||||
--cov=axolotl \
|
||||
--cov-append
|
||||
|
||||
# Run patched tests excluding lora kernels with coverage append
|
||||
pytest --full-trace -vvv --durations=10 \
|
||||
uv run pytest --full-trace -vvv --durations=10 \
|
||||
--ignore=tests/e2e/patched/lora_kernels \
|
||||
/workspace/axolotl/tests/e2e/patched \
|
||||
--cov=axolotl \
|
||||
--cov-append
|
||||
|
||||
# Run solo tests with coverage append
|
||||
pytest -v --durations=10 -n1 \
|
||||
uv run pytest -v --durations=10 -n1 \
|
||||
/workspace/axolotl/tests/e2e/solo/ \
|
||||
--cov=axolotl \
|
||||
--cov-append
|
||||
|
||||
# Run integration tests with coverage append
|
||||
pytest -v --durations=10 \
|
||||
uv run pytest -v --durations=10 \
|
||||
/workspace/axolotl/tests/e2e/integrations/ \
|
||||
--cov=axolotl \
|
||||
--cov-append
|
||||
|
||||
pytest -v --durations=10 /workspace/axolotl/tests/cli \
|
||||
uv run pytest -v --durations=10 /workspace/axolotl/tests/cli \
|
||||
--cov=axolotl \
|
||||
--cov-append
|
||||
|
||||
# Run remaining e2e tests with coverage append and final report
|
||||
pytest -v --durations=10 \
|
||||
uv run pytest -v --durations=10 \
|
||||
--ignore=tests/e2e/solo/ \
|
||||
--ignore=tests/e2e/patched/ \
|
||||
--ignore=tests/e2e/multigpu/ \
|
||||
@@ -52,4 +52,4 @@ pytest -v --durations=10 \
|
||||
--cov-append \
|
||||
--cov-report=xml:e2e-coverage.xml
|
||||
|
||||
codecov upload-process -t $CODECOV_TOKEN -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION} || true
|
||||
uv run codecov upload-process -t $CODECOV_TOKEN -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION} || true
|
||||
|
||||
@@ -23,7 +23,7 @@ df_args = {
|
||||
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
||||
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
|
||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-uv-py3.11-cu126-2.6.0"),
|
||||
"CUDA": os.environ.get("CUDA", "126"),
|
||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||
|
||||
@@ -23,7 +23,7 @@ df_args = {
|
||||
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
||||
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
|
||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-uv-py3.11-cu126-2.6.0"),
|
||||
"CUDA": os.environ.get("CUDA", "126"),
|
||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||
@@ -65,13 +65,8 @@ def run_cmd(cmd: str, run_folder: str):
|
||||
import subprocess # nosec
|
||||
|
||||
sp_env = os.environ.copy()
|
||||
sp_env["AXOLOTL_DATASET_NUM_PROC"] = "8"
|
||||
sp_env["AXOLOTL_DATASET_PROCESSES"] = "8"
|
||||
|
||||
# Propagate errors from subprocess.
|
||||
try:
|
||||
exit_code = subprocess.call(cmd.split(), cwd=run_folder, env=sp_env) # nosec
|
||||
if exit_code:
|
||||
print(f"Command '{cmd}' failed with exit code {exit_code}")
|
||||
return exit_code
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
print(f"Command '{cmd}' failed with exception {e}")
|
||||
if exit_code := subprocess.call(cmd.split(), cwd=run_folder, env=sp_env): # nosec
|
||||
exit(exit_code)
|
||||
|
||||
@@ -13,7 +13,7 @@ datasets:
|
||||
val_set_size: 0
|
||||
output_dir: temp_debug/axolotl_outputs/model
|
||||
dataset_prepared_path: temp_debug/axolotl_outputs/data
|
||||
dataset_num_proc: 1
|
||||
dataset_processes: 1
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: false
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
ARG BASE_TAG=main-base
|
||||
FROM axolotlai/axolotl-base:$BASE_TAG
|
||||
ARG BASE_TAG=main-base-uv
|
||||
FROM axolotlai/axolotl-base-uv:$BASE_TAG
|
||||
|
||||
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
||||
ARG AXOLOTL_EXTRAS=""
|
||||
ARG AXOLOTL_ARGS=""
|
||||
ARG CUDA="118"
|
||||
ARG PYTORCH_VERSION="2.1.2"
|
||||
ARG GIT_REF="refs/heads/main"
|
||||
ARG GIT_SHA="HEAD"
|
||||
ARG VENV_PYTHON="/workspace/axolotl-venv/bin/python"
|
||||
|
||||
ENV PYTORCH_VERSION=$PYTORCH_VERSION
|
||||
ENV GIT_REF=$GIT_REF
|
||||
ENV GIT_SHA=$GIT_SHA
|
||||
ENV VENV_PYTHON=$VENV_PYTHON
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev rsync s3fs && \
|
||||
@@ -20,16 +26,19 @@ RUN git clone --depth=1 https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
|
||||
WORKDIR /workspace/axolotl
|
||||
|
||||
# Ensure we are on the expected commit and break Docker cache between revisions
|
||||
RUN git fetch origin "$GIT_REF" && git checkout "$GIT_SHA"
|
||||
|
||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||
else \
|
||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||
fi && \
|
||||
python scripts/unsloth_install.py | sh && \
|
||||
python scripts/cutcrossentropy_install.py | sh && \
|
||||
pip install pytest && \
|
||||
pip cache purge
|
||||
uv pip install --python "$VENV_PYTHON" --no-build-isolation flash-attn $AXOLOTL_ARGS && \
|
||||
"$VENV_PYTHON" scripts/unsloth_install.py | sh && \
|
||||
"$VENV_PYTHON" scripts/cutcrossentropy_install.py | sh && \
|
||||
uv pip install --python "$VENV_PYTHON" pytest
|
||||
|
||||
# fix so that git fetch/pull from remote works with shallow clone
|
||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||
|
||||
@@ -47,8 +47,6 @@ RUN git lfs install --skip-repo && \
|
||||
pip3 install -U --no-cache-dir pydantic==1.10.10 && \
|
||||
pip3 cache purge
|
||||
|
||||
RUN if [ "$PYTORCH_VERSION" = "2.9.0" ] && [ "$CUDA" = "128" ] ; then \
|
||||
wget https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.17/flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||
pip3 install --no-cache-dir flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||
rm flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||
RUN if [ "$PYTORCH_VERSION" = "2.6.0" ] && [ "$CUDA" = "124" ] ; then \
|
||||
FLASH_ATTENTION_FORCE_BUILD="TRUE" uv pip install --no-build-isolation flash-attn==2.8.0.post2; \
|
||||
fi
|
||||
|
||||
@@ -12,8 +12,8 @@ EXPOSE 22
|
||||
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
||||
COPY scripts/motd /etc/motd
|
||||
|
||||
RUN pip install jupyterlab notebook ipywidgets && \
|
||||
jupyter lab clean
|
||||
RUN uv pip install --python "$VENV_PYTHON" jupyterlab notebook ipywidgets && \
|
||||
"$VENV_PYTHON" -m jupyter lab clean
|
||||
RUN apt update && \
|
||||
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop && \
|
||||
rm -rf /var/cache/apt/archives && \
|
||||
|
||||
@@ -12,8 +12,8 @@ EXPOSE 22
|
||||
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
||||
COPY scripts/motd /etc/motd
|
||||
|
||||
RUN pip install jupyterlab notebook ipywidgets && \
|
||||
jupyter lab clean
|
||||
RUN uv pip install --python "$VENV_PYTHON" jupyterlab notebook ipywidgets && \
|
||||
"$VENV_PYTHON" -m jupyter lab clean
|
||||
RUN apt update && \
|
||||
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm && \
|
||||
rm -rf /var/cache/apt/archives && \
|
||||
|
||||
@@ -24,13 +24,14 @@ RUN git fetch origin +$GITHUB_REF && \
|
||||
|
||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||
uv pip install --no-build-isolation -e .[deepspeed,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||
else \
|
||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
|
||||
fi
|
||||
uv pip install --no-build-isolation -e .[deepspeed,mamba-ssm] $AXOLOTL_ARGS; \
|
||||
fi && \
|
||||
uv pip install --no-build-isolation flash-attn $AXOLOTL_ARGS
|
||||
|
||||
# So we can test the Docker image
|
||||
RUN pip install pytest
|
||||
RUN uv pip install pytest
|
||||
|
||||
# fix so that git fetch/pull from remote works
|
||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||
|
||||
@@ -13,6 +13,7 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
||||
ENV PYTHON_VERSION=$PYTHON_VERSION
|
||||
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
|
||||
ENV UV_TORCH_BACKEND="cu${CUDA}"
|
||||
ENV VENV_PYTHON=/workspace/axolotl-venv/bin/python
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config curl && rm -rf /var/lib/apt/lists/* \
|
||||
@@ -29,14 +30,8 @@ RUN uv venv --no-project --relocatable axolotl-venv
|
||||
|
||||
ENV PATH="/workspace/axolotl-venv/bin:${PATH}"
|
||||
|
||||
RUN uv pip install packaging setuptools wheel psutil \
|
||||
&& uv pip install torch==${PYTORCH_VERSION} torchvision \
|
||||
&& uv pip install --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" \
|
||||
&& uv pip install "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" \
|
||||
&& uv pip install awscli pydantic
|
||||
|
||||
RUN if [ "$PYTORCH_VERSION" = "2.9.0" ] && [ "$CUDA" = "128" ] ; then \
|
||||
wget https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.17/flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||
uv pip install --no-cache-dir flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||
rm flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||
fi
|
||||
RUN uv pip install --python "$VENV_PYTHON" packaging setuptools wheel psutil protobuf grpclib \
|
||||
&& uv pip install --python "$VENV_PYTHON" torch==${PYTORCH_VERSION} \
|
||||
&& uv pip install --python "$VENV_PYTHON" --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" \
|
||||
&& uv pip install --python "$VENV_PYTHON" "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" \
|
||||
&& uv pip install --python "$VENV_PYTHON" awscli pydantic
|
||||
|
||||
@@ -29,7 +29,7 @@ While debugging it's helpful to simplify your test scenario as much as possible.
|
||||
1. **Make sure you are using the latest version of axolotl**: This project changes often and bugs get fixed fast. Check your git branch and make sure you have pulled the latest changes from `main`.
|
||||
1. **Eliminate concurrency**: Restrict the number of processes to 1 for both training and data preprocessing:
|
||||
- Set `CUDA_VISIBLE_DEVICES` to a single GPU, ex: `export CUDA_VISIBLE_DEVICES=0`.
|
||||
- Set `dataset_num_proc: 1` in your axolotl config or run the training command with `--dataset_num_proc=1`.
|
||||
- Set `dataset_processes: 1` in your axolotl config or run the training command with `--dataset_processes=1`.
|
||||
2. **Use a small dataset**: Construct or use a small dataset from HF Hub. When using a small dataset, you will often have to make sure `sample_packing: False` and `eval_sample_packing: False` to avoid errors. If you are in a pinch and don't have time to construct a small dataset but want to use from the HF Hub, you can shard the data (this will still tokenize the entire dataset, but will only use a fraction of the data for training. For example, to shard the dataset into 20 pieces, add the following to your axolotl config):
|
||||
|
||||
```yaml
|
||||
@@ -72,8 +72,8 @@ datasets:
|
||||
Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/userguide/development_mode.html) of Axolotl, which ensures that changes you make to the code are reflected at runtime. Run the following commands from the root of this project:
|
||||
|
||||
```bash
|
||||
pip3 install packaging
|
||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||
uv sync --extra deepspeed
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
#### Remote Hosts
|
||||
@@ -101,7 +101,7 @@ For example, to mimic the command `cd devtools && CUDA_VISIBLE_DEVICES=0 acceler
|
||||
"-m", "axolotl.cli.train", "dev_chat_template.yml",
|
||||
// The flags below simplify debugging by overriding the axolotl config
|
||||
// with the debugging tips above. Modify as needed.
|
||||
"--dataset_num_proc=1", // limits data preprocessing to one process
|
||||
"--dataset_processes=1", // limits data preprocessing to one process
|
||||
"--max_steps=1", // limits training to just one step
|
||||
"--batch_size=1", // minimizes batch size
|
||||
"--micro_batch_size=1", // minimizes batch size
|
||||
@@ -213,8 +213,8 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
||||
You will now be in the container. Next, perform an editable install of Axolotl:
|
||||
|
||||
```bash
|
||||
pip3 install packaging
|
||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||
uv sync --extra deepspeed
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
### Attach To Container
|
||||
|
||||
@@ -63,14 +63,6 @@ description: Frequently asked questions
|
||||
|
||||
> A: There seems to be a wheel issue with FA2 2.8.0 on CUDA 12.4. Try CUDA 12.6 instead or downgrade to FA2 2.7.4. Please refer to the upstream issue: https://github.com/Dao-AILab/flash-attention/issues/1717.
|
||||
|
||||
**Q: Can we mix text and text+image datasets for VLM training?**
|
||||
|
||||
> A: Yes, you can for newer VLM arch. The ones that would not work are LLaVA / Pixtral arch. If you notice one not working, please let us know!
|
||||
|
||||
**Q: Why is `memory/max_*` different from `nvidia-smi`?**
|
||||
|
||||
> A: We use `torch` APIs to retrieve this information. You can see https://docs.pytorch.org/docs/stable/notes/cuda.html#cuda-memory-management for more information.
|
||||
|
||||
### Chat templates
|
||||
|
||||
**Q: `jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' / 'role' / ____`**
|
||||
|
||||
@@ -29,19 +29,40 @@ Follow the instructions at: [https://pytorch.org/get-started/locally/](https://p
|
||||
For Blackwell GPUs, please use Pytorch 2.7.0 and CUDA 12.8.
|
||||
:::
|
||||
|
||||
### PyPI Installation (Recommended) {#sec-pypi}
|
||||
### uv Installation (Recommended) {#sec-uv-quick}
|
||||
|
||||
```{.bash}
|
||||
pip3 install -U packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
|
||||
# Install uv if not already installed
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
# Add Axolotl to a project (recommended)
|
||||
uv init my-project && cd my-project
|
||||
uv add axolotl
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
For a quick one-off install without creating a project:
|
||||
|
||||
```{.bash}
|
||||
uv pip install axolotl
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
### pip Installation {#sec-pypi}
|
||||
|
||||
```{.bash}
|
||||
pip install --no-build-isolation axolotl[deepspeed]
|
||||
pip install --no-build-isolation flash-attn
|
||||
```
|
||||
|
||||
We use `--no-build-isolation` in order to detect the installed PyTorch version (if
|
||||
installed) in order not to clobber it, and so that we set the correct version of
|
||||
dependencies that are specific to the PyTorch version or other installed
|
||||
co-dependencies.
|
||||
co-dependencies. Flash Attention is resolved separately so it can be built against
|
||||
the environment configured by the previous step.
|
||||
|
||||
### uv Installation {#sec-uv}
|
||||
### Advanced uv Installation {#sec-uv}
|
||||
|
||||
uv is a fast, reliable Python package installer and resolver built in Rust. It offers significant performance improvements over pip and provides better dependency resolution, making it an excellent choice for complex environments.
|
||||
|
||||
@@ -62,28 +83,38 @@ source .venv/bin/activate
|
||||
Install PyTorch
|
||||
- PyTorch 2.6.0 recommended
|
||||
```{.bash}
|
||||
uv pip install packaging setuptools wheel
|
||||
uv pip install torch==2.6.0
|
||||
uv pip install awscli pydantic
|
||||
```
|
||||
|
||||
Install axolotl from PyPi
|
||||
```{.bash}
|
||||
uv pip install --no-build-isolation axolotl[deepspeed,flash-attn]
|
||||
|
||||
uv pip install --no-build-isolation axolotl[deepspeed]
|
||||
# optionally install with vLLM if you're using torch==2.6.0 and want to train w/ GRPO
|
||||
uv pip install --no-build-isolation axolotl[deepspeed,flash-attn,vllm]
|
||||
# uv pip install --no-build-isolation axolotl[deepspeed,vllm]
|
||||
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
### Edge/Development Build {#sec-edge-build}
|
||||
|
||||
For the latest features between releases:
|
||||
|
||||
#### Using uv (recommended)
|
||||
```{.bash}
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
pip3 install -U packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh # If not already installed
|
||||
uv sync
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
#### Using pip
|
||||
```{.bash}
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
pip install --no-build-isolation -e '.[deepspeed]'
|
||||
pip install --no-build-isolation flash-attn
|
||||
```
|
||||
|
||||
### Docker {#sec-docker}
|
||||
@@ -141,7 +172,7 @@ For providers supporting Docker:
|
||||
### macOS {#sec-macos}
|
||||
|
||||
```{.bash}
|
||||
pip3 install --no-build-isolation -e '.'
|
||||
uv pip install --no-build-isolation -e '.'
|
||||
```
|
||||
|
||||
See @sec-troubleshooting for Mac-specific issues.
|
||||
@@ -159,10 +190,15 @@ We recommend using WSL2 (Windows Subsystem for Linux) or Docker.
|
||||
1. Install Python ≥3.11
|
||||
2. Install PyTorch: https://pytorch.org/get-started/locally/
|
||||
3. Install Axolotl:
|
||||
```{.bash}
|
||||
pip3 install -U packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
||||
```
|
||||
```{.bash}
|
||||
# Option A: add Axolotl to the environment
|
||||
uv add axolotl
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install axolotl
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
4. (Optional) Login to Hugging Face:
|
||||
```{.bash}
|
||||
huggingface-cli login
|
||||
|
||||
@@ -27,9 +27,3 @@ learning_rate: 2e-5
|
||||
In this example, we have a default learning rate of 2e-5 across the entire model, but we have a separate learning rate
|
||||
of 1e-6 for all the self attention `o_proj` modules across all layers, and a learning are of 1e-5 to the 3rd layer's
|
||||
self attention `q_proj` module.
|
||||
|
||||
::: {.callout-note}
|
||||
|
||||
We currently only support varying `lr` for now. If you're interested in adding support for others (`weight_decay`), we welcome PRs. See https://github.com/axolotl-ai-cloud/axolotl/blob/613bcf90e58f3ab81d3827e7fc572319908db9fb/src/axolotl/core/trainers/mixins/optimizer.py#L17
|
||||
|
||||
:::
|
||||
|
||||
@@ -88,7 +88,6 @@ fsdp_sync_module_states | **REMOVED**
|
||||
fsdp_cpu_ram_efficient_loading | cpu_ram_efficient_loading
|
||||
fsdp_state_dict_type | state_dict_type
|
||||
fsdp_use_orig_params | **REMOVED**
|
||||
fsdp_activation_checkpointing | activation_checkpointing
|
||||
|
||||
For more details, please see the migration guide in the [torchtitan repo](https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md). In Axolotl,
|
||||
if you were using the following FSDP1 config:
|
||||
|
||||
@@ -56,14 +56,10 @@ image_resize_algorithm: bilinear
|
||||
|
||||
Please see [examples](https://github.com/axolotl-ai/axolotl/tree/main/examples) folder for full configs.
|
||||
|
||||
::: {.callout-tip}
|
||||
::: {.callout-warning}
|
||||
Some of our chat_templates have been extended to support broader dataset types. This should not break any existing configs.
|
||||
:::
|
||||
|
||||
::: {.callout-note}
|
||||
As of now, we do not truncate nor drop samples based on `sequence_len` as each arch has different ways to process non-text tokens. We are looking for help on this.
|
||||
:::
|
||||
|
||||
### Mllama {#sec-mllama}
|
||||
|
||||
```yaml
|
||||
@@ -99,7 +95,7 @@ chat_template: llava
|
||||
### Mistral-Small-3.1 {#sec-mistral-small-31}
|
||||
|
||||
::: {.callout-tip}
|
||||
Please make sure to install vision lib via `pip install 'mistral-common[opencv]==1.8.5'`
|
||||
Please make sure to install vision lib via `uv pip install 'mistral-common[opencv]==1.8.5'`
|
||||
:::
|
||||
|
||||
```yaml
|
||||
@@ -109,7 +105,7 @@ base_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503
|
||||
### Magistral-Small-2509 {#sec-magistral-small-2509}
|
||||
|
||||
::: {.callout-tip}
|
||||
Please make sure to install vision lib via `pip install 'mistral-common[opencv]==1.8.5'`
|
||||
Please make sure to install vision lib via `uv pip install 'mistral-common[opencv]==1.8.5'`
|
||||
:::
|
||||
|
||||
```yaml
|
||||
@@ -119,7 +115,7 @@ base_model: mistralai/Magistral-Small-2509
|
||||
### Voxtral {#sec-voxtral}
|
||||
|
||||
::: {.callout-tip}
|
||||
Please make sure to install audio lib via `pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'`
|
||||
Please make sure to install audio lib via `uv pip install librosa==0.11.0 'mistral_common[audio]==1.8.3'`
|
||||
:::
|
||||
|
||||
```yaml
|
||||
@@ -147,7 +143,7 @@ The model's initial loss and grad norm will be very high. We suspect this to be
|
||||
:::
|
||||
|
||||
::: {.callout-tip}
|
||||
Please make sure to install `timm` via `pip3 install timm==1.0.17`
|
||||
Please make sure to install `timm` via `uv pip install timm==1.0.17`
|
||||
:::
|
||||
|
||||
```yaml
|
||||
@@ -172,18 +168,10 @@ base_model: Qwen/Qwen2.5-VL-7B-Instruct
|
||||
chat_template: qwen2_vl # same as qwen2-vl
|
||||
```
|
||||
|
||||
### Qwen3-VL {#sec-qwen3-vl}
|
||||
|
||||
```yaml
|
||||
base_model: Qwen/Qwen3-VL-4B-Instruct
|
||||
|
||||
chat_template: qwen2_vl # same as qwen2-vl
|
||||
```
|
||||
|
||||
### SmolVLM2 {#sec-smolvlm2}
|
||||
|
||||
::: {.callout-tip}
|
||||
Please make sure to install `num2words` via `pip3 install num2words==0.5.14`
|
||||
Please make sure to install `num2words` via `uv pip install num2words==0.5.14`
|
||||
:::
|
||||
|
||||
```yaml
|
||||
@@ -193,7 +181,7 @@ base_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct
|
||||
### LFM2-VL {#sec-lfm2-vl}
|
||||
|
||||
::: {.callout-warning}
|
||||
Please uninstall `causal-conv1d` via `pip3 uninstall -y causal-conv1d`
|
||||
Please uninstall `causal-conv1d` via `uv pip uninstall -y causal-conv1d`
|
||||
:::
|
||||
|
||||
```yaml
|
||||
@@ -234,7 +222,7 @@ For audio loading, you can use the following keys within `content` alongside `"t
|
||||
|
||||
::: {.callout-tip}
|
||||
|
||||
You may need to install `librosa` via `pip3 install librosa==0.11.0`.
|
||||
You may need to install `librosa` via `uv pip install librosa==0.11.0`.
|
||||
|
||||
:::
|
||||
|
||||
|
||||
@@ -219,21 +219,6 @@ DPO supports the following types with the following dataset format:
|
||||
}
|
||||
```
|
||||
|
||||
#### chat_template.argilla_chat
|
||||
|
||||
```json
|
||||
{
|
||||
"chosen": [
|
||||
{"role": "user", "content": "..."},
|
||||
{"role": "assistant", "content": "..."}
|
||||
],
|
||||
"rejected": [
|
||||
{"role": "user", "content": "..."},
|
||||
{"role": "assistant", "content": "..."}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### chat_template.default
|
||||
|
||||
```yaml
|
||||
|
||||
@@ -49,9 +49,9 @@ When sequence parallelism is enabled:
|
||||
To use sequence parallelism, you need:
|
||||
|
||||
- Multiple GPUs (at least 2)
|
||||
- The `ring-flash-attn` package. Install with:
|
||||
- `pip install axolotl[ring-flash-attn]` (preferred)
|
||||
- `pip install ring-flash-attn>=0.1.4`
|
||||
- The `ring-flash-attn` package. Install with either `uv sync --extra ring-flash-attn`
|
||||
(from a cloned repository) or `uv pip install ring-flash-attn>=0.1.4`.
|
||||
- Flash Attention installed separately with `uv pip install flash-attn --no-build-isolation`.
|
||||
|
||||
## Limitations
|
||||
|
||||
|
||||
@@ -6,17 +6,20 @@ LFM2 features a new hybrid Liquid architecture with multiplicative gates, short-
|
||||
|
||||
This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
|
||||
|
||||
Thanks to the team at LiquidAI for giving us early access to prepare for these releases.
|
||||
|
||||
## Getting Started
|
||||
|
||||
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html).
|
||||
|
||||
Here is an example of how to install from pip:
|
||||
```bash
|
||||
# Ensure you have a compatible version of Pytorch installed
|
||||
pip3 install packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Ensure you have a compatible version of PyTorch installed
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Run one of the finetuning examples below.
|
||||
@@ -33,19 +36,11 @@ Thanks to the team at LiquidAI for giving us early access to prepare for these r
|
||||
axolotl train examples/LiquidAI/lfm2-vl-lora.yaml
|
||||
```
|
||||
|
||||
**LFM2-MoE**
|
||||
```bash
|
||||
pip install git+https://github.com/huggingface/transformers.git@0c9a72e4576fe4c84077f066e585129c97bfd4e6
|
||||
|
||||
# LoRA SFT (1x48GB @ 16.2GiB)
|
||||
axolotl train examples/LiquidAI/lfm2-8b-a1b-lora.yaml
|
||||
```
|
||||
|
||||
### TIPS
|
||||
|
||||
- **Installation Error**: If you encounter `ImportError: ... undefined symbol ...` or `ModuleNotFoundError: No module named 'causal_conv1d_cuda'`, the `causal-conv1d` package may have been installed incorrectly. Try uninstalling it:
|
||||
```bash
|
||||
pip uninstall -y causal-conv1d
|
||||
uv pip uninstall -y causal-conv1d
|
||||
```
|
||||
|
||||
- **Dataset Loading**: Read more on how to load your own dataset in our [documentation](https://docs.axolotl.ai/docs/dataset_loading.html).
|
||||
@@ -55,13 +50,14 @@ Thanks to the team at LiquidAI for giving us early access to prepare for these r
|
||||
|
||||
## Optimization Guides
|
||||
|
||||
- [Optimizations Guide](https://docs.axolotl.ai/docs/optimizations.html)
|
||||
- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
|
||||
- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
|
||||
- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
|
||||
|
||||
## Related Resources
|
||||
|
||||
- [LFM2 Blog](https://www.liquid.ai/blog/liquid-foundation-models-v2-our-second-series-of-generative-ai-models)
|
||||
- [LFM2-VL Blog](https://www.liquid.ai/blog/lfm2-vl-efficient-vision-language-models)
|
||||
- [LFM2-MoE Blog](https://www.liquid.ai/blog/lfm2-8b-a1b-an-efficient-on-device-mixture-of-experts)
|
||||
- [Axolotl Docs](https://docs.axolotl.ai)
|
||||
- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
|
||||
- [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: LiquidAI/LFM2-350M
|
||||
|
||||
plugins:
|
||||
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||
chunked_cross_entropy: true
|
||||
|
||||
eot_tokens:
|
||||
- "<|im_end|>"
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
base_model: LiquidAI/LFM2-8B-A1B
|
||||
|
||||
plugins:
|
||||
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||
|
||||
load_in_8bit: true
|
||||
|
||||
eot_tokens:
|
||||
- "<|im_end|>"
|
||||
datasets:
|
||||
- path: mlabonne/FineTome-100k
|
||||
type: chat_template
|
||||
split: train[:20%]
|
||||
field_messages: conversations
|
||||
message_field_role: from
|
||||
message_field_content: value
|
||||
dataset_prepared_path: last_run_prepared
|
||||
val_set_size: 0.05
|
||||
output_dir: ./outputs/out
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
|
||||
adapter: lora
|
||||
lora_model_dir:
|
||||
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_modules: 'model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 2
|
||||
micro_batch_size: 4
|
||||
num_epochs: 1
|
||||
optimizer: adamw_torch_fused
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 5e-5
|
||||
|
||||
bf16: true
|
||||
tf32: true
|
||||
|
||||
gradient_checkpointing: true
|
||||
resume_from_checkpoint:
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
|
||||
warmup_ratio: 0.1
|
||||
evals_per_epoch: 2
|
||||
saves_per_epoch: 1
|
||||
|
||||
weight_decay: 0.0
|
||||
|
||||
# save_first_step: true # uncomment this to validate checkpoint saving works with your config
|
||||
@@ -3,9 +3,6 @@ trust_remote_code: true
|
||||
model_type: AutoModelForImageTextToText
|
||||
processor_type: AutoProcessor
|
||||
|
||||
plugins:
|
||||
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||
|
||||
# these 3 lines are needed for now to handle vision chat templates w images
|
||||
skip_prepare_dataset: true
|
||||
remove_unused_columns: false
|
||||
|
||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
@@ -31,7 +31,7 @@ python scripts/cutcrossentropy_install.py | sh
|
||||
# For those using our Docker image, use the below path.
|
||||
export CUDA_HOME=/usr/local/cuda
|
||||
|
||||
pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||
uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||
```
|
||||
|
||||
For any installation errors, see [XIELU Installation Issues](#xielu-installation-issues)
|
||||
@@ -67,7 +67,7 @@ If those didn't help, please try the below solutions:
|
||||
1. Pass env for CMAKE and try install again:
|
||||
|
||||
```bash
|
||||
Python_EXECUTABLE=$(which python) pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||
Python_EXECUTABLE=$(which python) uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||
```
|
||||
|
||||
2. Git clone the repo and manually hardcode python path:
|
||||
@@ -92,7 +92,7 @@ If those didn't help, please try the below solutions:
|
||||
```
|
||||
|
||||
```bash
|
||||
pip3 install . --no-build-isolation --no-deps
|
||||
uv pip install . --no-build-isolation --no-deps
|
||||
```
|
||||
|
||||
## Optimization Guides
|
||||
|
||||
@@ -17,8 +17,8 @@ Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the A
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
@@ -12,10 +12,10 @@
|
||||
"\n",
|
||||
"Axolotl is the most performant LLM post-training framework available, delivering faster training with efficient, consistent and stable performance. Train your workload and ship your product 30% faster; saving you both time and money.\n",
|
||||
"\n",
|
||||
"- ⭐ us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
|
||||
"- 📜 Read the [Docs](http://docs.axolotl.ai/)\n",
|
||||
"- 💬 Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
|
||||
"- 📰 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
|
||||
"- \u2b50 us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
|
||||
"- \ud83d\udcdc Read the [Docs](http://docs.axolotl.ai/)\n",
|
||||
"- \ud83d\udcac Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
|
||||
"- \ud83d\udcf0 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -39,8 +39,8 @@
|
||||
"source": [
|
||||
"%%capture\n",
|
||||
"# This step can take ~5-10 minutes to install dependencies\n",
|
||||
"!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
|
||||
"!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@8a1a0ec\""
|
||||
"!uv pip install --no-build-isolation axolotl>=0.9.1\n!uv pip install flash-attn --no-build-isolation\n",
|
||||
"!uv pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1371,7 +1371,7 @@
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
||||
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv\u2026"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
@@ -1729,9 +1729,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_12815f401eba44658caa7b2e490137a8",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_30e02aa2d0d241979369e598287f2639",
|
||||
"value": "Drop Samples with Zero Trainable Tokens (num_proc=2): 100%"
|
||||
"value": "Drop\u2007Samples\u2007with\u2007Zero\u2007Trainable\u2007Tokens\u2007(num_proc=2):\u2007100%"
|
||||
}
|
||||
},
|
||||
"083f9cda8d754c168beee10d2f8955a2": {
|
||||
@@ -1774,9 +1774,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_b195f160ca20442fadd8b5aed0ee41af",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_ca65e32eb52f48c09a84b33cb18f22cd",
|
||||
"value": " 11.4M/11.4M [00:00<00:00, 21.8MB/s]"
|
||||
"value": "\u200711.4M/11.4M\u2007[00:00<00:00,\u200721.8MB/s]"
|
||||
}
|
||||
},
|
||||
"0a46ad75c198463d843fb35e813642cb": {
|
||||
@@ -1917,7 +1917,7 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_b1bea589efa14258a9982071b87938bf",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_590eef89881545aa8bbef9a8bbe7fb00",
|
||||
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
||||
}
|
||||
@@ -1938,9 +1938,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_bfcdbba993b74972a9e3e575f86908ff",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_6ebb2ec171414e47a14765505f64bb3c",
|
||||
"value": " 3.84G/3.84G [00:09<00:00, 664MB/s]"
|
||||
"value": "\u20073.84G/3.84G\u2007[00:09<00:00,\u2007664MB/s]"
|
||||
}
|
||||
},
|
||||
"0e936d9dbf9c4fdd86bbfe9730dedc47": {
|
||||
@@ -2296,9 +2296,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_349eee9f56d64f0cba6fc24ff2c50c9b",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_7e5d3774060e4589aa65982da5ea4ef4",
|
||||
"value": " 9985/9985 [00:04<00:00, 2604.11 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[00:04<00:00,\u20072604.11\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"16d1283741404b7bb319094c992fce01": {
|
||||
@@ -2317,9 +2317,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_a4e5789584564049b83df7c6c54a3e08",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_ff3a94b146a948b6907f5d80c7157f99",
|
||||
"value": " 9985/0 [00:00<00:00, 50763.46 examples/s]"
|
||||
"value": "\u20079985/0\u2007[00:00<00:00,\u200750763.46\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"1811cda0644e4190a9469d1774435d82": {
|
||||
@@ -2390,9 +2390,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_e366ae3fceec4566b9ed303d6c5f90af",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5dd7d150dbe04f08b165ce7f2c27cd11",
|
||||
"value": "model-00008-of-00008.safetensors: 100%"
|
||||
"value": "model-00008-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"19127c7bb1554ccbac877059f9a82db0": {
|
||||
@@ -2561,9 +2561,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_0dea5caa27384f5689e3cab51f558727",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_a6f48410b9964fefba0c3009a77dc838",
|
||||
"value": " 9.68k/9.68k [00:00<00:00, 812kB/s]"
|
||||
"value": "\u20079.68k/9.68k\u2007[00:00<00:00,\u2007812kB/s]"
|
||||
}
|
||||
},
|
||||
"1f7d30f71bbd4547a9150d21da071055": {
|
||||
@@ -2634,9 +2634,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_f4a1795dc7514a718f478245f521f0ba",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5e746eb25bbe416fb585fa24e79f5177",
|
||||
"value": "model-00002-of-00008.safetensors: 100%"
|
||||
"value": "model-00002-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"20352e5f58d24bb8b1f3940efd14fe4a": {
|
||||
@@ -2707,9 +2707,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_1c6f1f10667545aaab958016ba7e2c94",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_e6e969610738449887259063967f82b0",
|
||||
"value": " 2.78M/2.78M [00:00<00:00, 17.8MB/s]"
|
||||
"value": "\u20072.78M/2.78M\u2007[00:00<00:00,\u200717.8MB/s]"
|
||||
}
|
||||
},
|
||||
"258b7c635c1045329d4669e48c46ccd5": {
|
||||
@@ -3056,9 +3056,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_be724f04b03942b2a033a7e8898bb4fd",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_fcbab4d8dced41a18dfccce81e3a45a0",
|
||||
"value": "model-00005-of-00008.safetensors: 100%"
|
||||
"value": "model-00005-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"3036608c71904ce9ae4bb2a9fa8802d9": {
|
||||
@@ -3077,9 +3077,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_5ca6be24acb548cea130bd58e9954c7c",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5cfb02ee044b4011a378efa8b54a370f",
|
||||
"value": " 3.96G/3.96G [00:10<00:00, 531MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:10<00:00,\u2007531MB/s]"
|
||||
}
|
||||
},
|
||||
"30a81da86f8043eca301e86a8651201a": {
|
||||
@@ -3629,9 +3629,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_8f5bd719974e41c3a8dd9a5b0d3d71e6",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_b87c84de30e84b3abf4871461fb9cbd3",
|
||||
"value": "Loading checkpoint shards: 100%"
|
||||
"value": "Loading\u2007checkpoint\u2007shards:\u2007100%"
|
||||
}
|
||||
},
|
||||
"41f3b32c2f6b4034ae7a3b9124e28bc7": {
|
||||
@@ -3791,7 +3791,7 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_39789237703c4a418134243055c9cbf5",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_a3a945817f684328b34651fe052393ec",
|
||||
"value": "Connecting..."
|
||||
}
|
||||
@@ -4077,9 +4077,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_4d468f96ec924681ad65eb671674b93e",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_ad7599de524549c48bf2d3124ad4b299",
|
||||
"value": "Dropping Long Sequences (num_proc=2): 100%"
|
||||
"value": "Dropping\u2007Long\u2007Sequences\u2007(num_proc=2):\u2007100%"
|
||||
}
|
||||
},
|
||||
"5ca240f31e6b44e3882c5eb37cd5a309": {
|
||||
@@ -4471,9 +4471,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_5e18768f7ad6434ba8b8b8a2e853e204",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_bb33aec33a6447078c31bfd728942994",
|
||||
"value": " 728/728 [00:00<00:00, 20.3kB/s]"
|
||||
"value": "\u2007728/728\u2007[00:00<00:00,\u200720.3kB/s]"
|
||||
}
|
||||
},
|
||||
"62e302ebdad64aada0ffe64ae1c873f3": {
|
||||
@@ -4636,9 +4636,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_81c3db71ac704280ad030072655f1537",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_042e091f75694c47aee761e760e76773",
|
||||
"value": " 9985/9985 [00:02<00:00, 3977.47 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[00:02<00:00,\u20073977.47\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"67da6c4260574869aa24c3cbc1bc1654": {
|
||||
@@ -4778,7 +4778,7 @@
|
||||
"description_tooltip": null,
|
||||
"disabled": false,
|
||||
"layout": "IPY_MODEL_2e257c8be2da40b4bb67a9e4ab6811f3",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_56e3768bef5a4b9db4168c5c17f509c2",
|
||||
"value": ""
|
||||
}
|
||||
@@ -4823,9 +4823,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_41f3b32c2f6b4034ae7a3b9124e28bc7",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_a10d0a76010f4e508c65a9b69ebc5156",
|
||||
"value": "Tokenizing Prompts (num_proc=2): 100%"
|
||||
"value": "Tokenizing\u2007Prompts\u2007(num_proc=2):\u2007100%"
|
||||
}
|
||||
},
|
||||
"704f2f5a9b1c49d5a75a0025a5dda11b": {
|
||||
@@ -5071,9 +5071,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_93a44a11aa4846fa8efc6c1413ef1627",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_a55060adc3564407ac81ad7297d34aaa",
|
||||
"value": "train.jsonl: 100%"
|
||||
"value": "train.jsonl:\u2007100%"
|
||||
}
|
||||
},
|
||||
"7be6f04c284e4326bb4ff3d301e7b3c6": {
|
||||
@@ -5138,9 +5138,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_7fd44cf9ca6e4726bfd7ac21846d6a14",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_366a343b62fa47d8985a3bd464d99f9e",
|
||||
"value": "config.json: 100%"
|
||||
"value": "config.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"7cd0b85ebd204b7aba908417811ce4e0": {
|
||||
@@ -5339,9 +5339,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_67da6c4260574869aa24c3cbc1bc1654",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_94b9088614464f60a203de39dbcae853",
|
||||
"value": " 8/8 [01:47<00:00, 11.64s/it]"
|
||||
"value": "\u20078/8\u2007[01:47<00:00,\u200711.64s/it]"
|
||||
}
|
||||
},
|
||||
"823f1c78f15043e38bbd4dca3932a86a": {
|
||||
@@ -5488,7 +5488,7 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_8640ac440fbc4644b9a3af7ba3ae7183",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5cea7996f02040b187ece0bb2d6a8d1f",
|
||||
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
||||
}
|
||||
@@ -5509,9 +5509,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_ef223e8504b64e3592589880326aaf41",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_598da69727bd4fb8b1caf465ac736d7a",
|
||||
"value": " 1.67M/1.67M [00:00<00:00, 19.0MB/s]"
|
||||
"value": "\u20071.67M/1.67M\u2007[00:00<00:00,\u200719.0MB/s]"
|
||||
}
|
||||
},
|
||||
"897b77a56c09479bb11d7f2a30997e55": {
|
||||
@@ -5717,9 +5717,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_37de928300e34184881039378bd75e7f",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_0e936d9dbf9c4fdd86bbfe9730dedc47",
|
||||
"value": " 3.96G/3.96G [00:13<00:00, 273MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:13<00:00,\u2007273MB/s]"
|
||||
}
|
||||
},
|
||||
"936d04b5fe1b4c63bf0b080e423d051b": {
|
||||
@@ -6050,9 +6050,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_d955dcaa0e944e719f3a06139dd54a03",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_d3de2662c7964f1ba96e58da382af720",
|
||||
"value": "merges.txt: 100%"
|
||||
"value": "merges.txt:\u2007100%"
|
||||
}
|
||||
},
|
||||
"9cd5211b5d8b457aa0002f1d17b80028": {
|
||||
@@ -6071,9 +6071,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_6932489232ec4ab18a160b1e7fbcdfe1",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_4540927d98f54466b434ba4c0edf045d",
|
||||
"value": "model-00007-of-00008.safetensors: 100%"
|
||||
"value": "model-00007-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"9d4897eefb5f48259ffb2d23e332f752": {
|
||||
@@ -6303,9 +6303,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_3aaecbf540f54a2db9ab0931e3b1fe57",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_9e333ed3b5014069ac1dd969255dd591",
|
||||
"value": " 239/239 [00:00<00:00, 30.9kB/s]"
|
||||
"value": "\u2007239/239\u2007[00:00<00:00,\u200730.9kB/s]"
|
||||
}
|
||||
},
|
||||
"a20927bf5f2c41f58c1e31ac858ab36c": {
|
||||
@@ -6324,9 +6324,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_1811cda0644e4190a9469d1774435d82",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_35c811d2ae8e43f3b5cecbdd3cfa857f",
|
||||
"value": "tokenizer.json: 100%"
|
||||
"value": "tokenizer.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"a3a945817f684328b34651fe052393ec": {
|
||||
@@ -6360,9 +6360,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_ed5ca967ad5342929e578ac6aa4dc4c0",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_af401d117d5047629d3a6e2361757b62",
|
||||
"value": "model-00001-of-00008.safetensors: 100%"
|
||||
"value": "model-00001-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"a4e5789584564049b83df7c6c54a3e08": {
|
||||
@@ -6494,9 +6494,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_fa1282ccc7544e4f818e2f03ccffe4a5",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_bbbf575d2a4b4c6ea8389be79b2a6039",
|
||||
"value": "model.safetensors.index.json: 100%"
|
||||
"value": "model.safetensors.index.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"ab93eabd7cea4b94b4b7a387f101e8a1": {
|
||||
@@ -6582,9 +6582,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_62e302ebdad64aada0ffe64ae1c873f3",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_bd1b0dfed6d34d16af33a4a58330f5ec",
|
||||
"value": "Saving the dataset (1/1 shards): 100%"
|
||||
"value": "Saving\u2007the\u2007dataset\u2007(1/1\u2007shards):\u2007100%"
|
||||
}
|
||||
},
|
||||
"ad7599de524549c48bf2d3124ad4b299": {
|
||||
@@ -6967,9 +6967,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_2b3a2659b12244bd8548320320016dbf",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_0cd7efffbb3c4c4b972e63749f61ab97",
|
||||
"value": "Generating train split: "
|
||||
"value": "Generating\u2007train\u2007split:\u2007"
|
||||
}
|
||||
},
|
||||
"b87c84de30e84b3abf4871461fb9cbd3": {
|
||||
@@ -7085,9 +7085,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_0f480e3a0b0a45d2a2d2dec3cad923f3",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_fcb30372e7404c5d8a1ad4df91e6c7b2",
|
||||
"value": " 1.91G/1.91G [00:05<00:00, 444MB/s]"
|
||||
"value": "\u20071.91G/1.91G\u2007[00:05<00:00,\u2007444MB/s]"
|
||||
}
|
||||
},
|
||||
"bd1b0dfed6d34d16af33a4a58330f5ec": {
|
||||
@@ -7325,9 +7325,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_158c8b85dbf34de6a94b4e35e2fc7d5a",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_0b4c9753a7cb4354b8e5f187e6e1ad7c",
|
||||
"value": " 3.96G/3.96G [00:15<00:00, 564MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:15<00:00,\u2007564MB/s]"
|
||||
}
|
||||
},
|
||||
"c0991cf63ee6458b96e9a75e7a88b61a": {
|
||||
@@ -7346,9 +7346,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_ed28e2e0410d4e0b855467e798e53d66",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_d93f134f802b4b69b575bdaf07dbd27c",
|
||||
"value": "tokenizer_config.json: 100%"
|
||||
"value": "tokenizer_config.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"c12ea43372ac4d57bb9605f1a429b397": {
|
||||
@@ -7581,9 +7581,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_8bc9d8ba866c442b9118d9630009939c",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_9f56a2d9979c4bd8928c644c22c3ecdf",
|
||||
"value": "model-00003-of-00008.safetensors: 100%"
|
||||
"value": "model-00003-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"c6164e05a1914ae48083db9ad7f4ef7c": {
|
||||
@@ -7694,9 +7694,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_e40d1c1ac9494b3bade9858324e7ffdf",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_d65b6b060d9845779299491ac5599c31",
|
||||
"value": " 9985/9985 [01:04<00:00, 189.08 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[01:04<00:00,\u2007189.08\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"c7433acd3c4841e6958ae8f7e87b1808": {
|
||||
@@ -7737,9 +7737,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_0077aedc3d174560bce924ee89e9c006",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_00321cce58884f6f9b3855a21fcd9187",
|
||||
"value": "Add position_id column (Sample Packing) (num_proc=2): 100%"
|
||||
"value": "Add\u2007position_id\u2007column\u2007(Sample\u2007Packing)\u2007(num_proc=2):\u2007100%"
|
||||
}
|
||||
},
|
||||
"ca65e32eb52f48c09a84b33cb18f22cd": {
|
||||
@@ -8162,9 +8162,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_63580b6fb30642479fe3000915bf551a",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_8f726dbfb45d4528afa33e36a6313267",
|
||||
"value": " 27.3M/27.3M [00:00<00:00, 31.0MB/s]"
|
||||
"value": "\u200727.3M/27.3M\u2007[00:00<00:00,\u200731.0MB/s]"
|
||||
}
|
||||
},
|
||||
"d43c6df07ddb466587807d6dbe1ff614": {
|
||||
@@ -8183,9 +8183,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_8c4d4fc5a30f4e7cb3be53fe2adda33d",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_e90658f4bcb642baa78426012f863152",
|
||||
"value": "model-00004-of-00008.safetensors: 100%"
|
||||
"value": "model-00004-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"d65b6b060d9845779299491ac5599c31": {
|
||||
@@ -8474,9 +8474,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_34cf3df51fbc41cabfdbba153c007f0e",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_ac764024cf1c4e08ba7749afd2cd20ac",
|
||||
"value": "vocab.json: 100%"
|
||||
"value": "vocab.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"dfd2a2649b8341ef913207526708aff1": {
|
||||
@@ -8669,9 +8669,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_c6164e05a1914ae48083db9ad7f4ef7c",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_813621384dc748b0ad06775e22761c0b",
|
||||
"value": " 9985/9985 [00:03<00:00, 3622.89 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[00:03<00:00,\u20073622.89\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"e400cbf14bcc446a9d33b210cd93550b": {
|
||||
@@ -9065,9 +9065,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_fba7aa824b38467ab3061b226114cdec",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_f3075dccbd2747b4a7913b66f44f2596",
|
||||
"value": " 3.96G/3.96G [00:13<00:00, 398MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:13<00:00,\u2007398MB/s]"
|
||||
}
|
||||
},
|
||||
"ec030fc3c346426f9abc3a89892258d3": {
|
||||
@@ -9110,9 +9110,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_936d04b5fe1b4c63bf0b080e423d051b",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_f1cef8e8dc2646fb9fd09f3b09081074",
|
||||
"value": " 36.5k/36.5k [00:00<00:00, 4.32MB/s]"
|
||||
"value": "\u200736.5k/36.5k\u2007[00:00<00:00,\u20074.32MB/s]"
|
||||
}
|
||||
},
|
||||
"ed28e2e0410d4e0b855467e798e53d66": {
|
||||
@@ -9422,9 +9422,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_735d4f225b24414294fc1b213c61223c",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_5e5e15b0569b474c9620083b3ec6af55",
|
||||
"value": "generation_config.json: 100%"
|
||||
"value": "generation_config.json:\u2007100%"
|
||||
}
|
||||
},
|
||||
"f4667818b9d34a09891cd727a429a610": {
|
||||
@@ -9443,9 +9443,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_4b27c267393640f28f6eae0875bd2ed9",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_9858cb74a09748a39e8149baac96702c",
|
||||
"value": " 3.96G/3.96G [00:11<00:00, 457MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:11<00:00,\u2007457MB/s]"
|
||||
}
|
||||
},
|
||||
"f4a1795dc7514a718f478245f521f0ba": {
|
||||
@@ -9830,9 +9830,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_d1f9b10c130542f094c8fd3d1e23b5e9",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_e575d87a7efe4ec7b1efde489839d4a6",
|
||||
"value": "model-00006-of-00008.safetensors: 100%"
|
||||
"value": "model-00006-of-00008.safetensors:\u2007100%"
|
||||
}
|
||||
},
|
||||
"fe18bba7f3fb4c31bf840541f36b3425": {
|
||||
@@ -9873,9 +9873,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_e5a82df528bb4e408797a3b6c2758f4a",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_f113ebd8c1c34806bea4dd7ed3035173",
|
||||
"value": " 9985/9985 [00:00<00:00, 44264.88 examples/s]"
|
||||
"value": "\u20079985/9985\u2007[00:00<00:00,\u200744264.88\u2007examples/s]"
|
||||
}
|
||||
},
|
||||
"fea1b70fb46745feb5111b3929175b5d": {
|
||||
@@ -9931,9 +9931,9 @@
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_ab93eabd7cea4b94b4b7a387f101e8a1",
|
||||
"placeholder": "",
|
||||
"placeholder": "\u200b",
|
||||
"style": "IPY_MODEL_704f2f5a9b1c49d5a75a0025a5dda11b",
|
||||
"value": " 3.96G/3.96G [00:12<00:00, 656MB/s]"
|
||||
"value": "\u20073.96G/3.96G\u2007[00:12<00:00,\u2007656MB/s]"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,8 +16,13 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
||||
|
||||
@@ -10,17 +10,22 @@ Gemma-3n is a family of multimodal models from Google found on [HuggingFace](htt
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. In addition to Axolotl's requirements, Gemma-3n requires:
|
||||
|
||||
```bash
|
||||
pip3 install timm==1.0.17
|
||||
uv pip install timm==1.0.17
|
||||
|
||||
# for loading audio data
|
||||
pip3 install librosa==0.11.0
|
||||
uv pip install librosa==0.11.0
|
||||
```
|
||||
|
||||
3. Download sample dataset files
|
||||
|
||||
@@ -12,8 +12,13 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Choose one of the following configs below for training the 20B model. (for 120B, see [below](#training-120b))
|
||||
@@ -75,7 +80,7 @@ for more information about using a special vllm-openai docker image for inferenc
|
||||
Optionally, vLLM can be installed from nightly:
|
||||
|
||||
```bash
|
||||
pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly
|
||||
uv pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly
|
||||
```
|
||||
and the vLLM server can be started with the following command (modify `--tensor-parallel-size 8` to match your environment):
|
||||
```bash
|
||||
|
||||
@@ -13,8 +13,8 @@ Tencent released a family of opensource models called HunYuan with varying param
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
@@ -29,7 +29,7 @@ flex_attention: true
|
||||
flex_attn_compile_kwargs:
|
||||
dynamic: false
|
||||
mode: max-autotune-no-cudagraphs
|
||||
save_strategy: no
|
||||
|
||||
torch_compile: true
|
||||
|
||||
wandb_project:
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
base_model: NousResearch/Llama-3.2-1B
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
|
||||
load_in_4bit: true
|
||||
|
||||
datasets:
|
||||
- path: mhenrichsen/alpaca_2k_test
|
||||
type: alpaca
|
||||
|
||||
output_dir: ./outputs/opentelemetry-example
|
||||
|
||||
adapter: qlora
|
||||
sequence_len: 512
|
||||
sample_packing: false
|
||||
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_linear: true
|
||||
|
||||
# OpenTelemetry Configuration
|
||||
use_otel_metrics: true
|
||||
otel_metrics_host: "localhost"
|
||||
otel_metrics_port: 8000
|
||||
|
||||
# Disable WandB
|
||||
use_wandb: false
|
||||
|
||||
gradient_accumulation_steps: 4
|
||||
micro_batch_size: 2
|
||||
num_epochs: 1
|
||||
optimizer: paged_adamw_32bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
bf16: auto
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
logging_steps: 1
|
||||
flash_attention: false
|
||||
|
||||
warmup_ratio: 0.1
|
||||
evals_per_epoch: 2
|
||||
saves_per_epoch: 1
|
||||
weight_decay: 0.0
|
||||
|
||||
special_tokens:
|
||||
pad_token: "<|end_of_text|>"
|
||||
@@ -13,9 +13,14 @@ Thanks to the team at MistralAI for giving us early access to prepare for these
|
||||
Here is an example of how to install from pip:
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Ensure you have PyTorch installed (PyTorch 2.6.0 min)
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
||||
|
||||
@@ -12,7 +12,7 @@ Before starting, ensure you have:
|
||||
Run the thinking model fine-tuning:
|
||||
|
||||
```bash
|
||||
axolotl train examples/magistral/think/magistral-small-think-qlora.yaml
|
||||
axolotl train magistral-small-think-qlora.yaml
|
||||
```
|
||||
|
||||
This config uses about 19.1 GiB VRAM.
|
||||
|
||||
@@ -21,7 +21,7 @@ Before starting, ensure you have:
|
||||
|
||||
3. Run the fine-tuning:
|
||||
```bash
|
||||
axolotl train examples/magistral/vision/magistral-small-vision-24B-qlora.yml
|
||||
axolotl train magistral-small-vision-24B-qlora.yml
|
||||
```
|
||||
|
||||
This config uses about 17GiB VRAM.
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
# Mistral Small 3.1/3.2 Fine-tuning
|
||||
|
||||
This guide covers fine-tuning [Mistral Small 3.1](mistralai/Mistral-Small-3.1-24B-Instruct-2503) and [Mistral Small 3.2](mistralai/Mistral-Small-3.2-24B-Instruct-2506) with vision capabilities using Axolotl.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before starting, ensure you have:
|
||||
- Installed Axolotl (see [Installation docs](https://docs.axolotl.ai/docs/installation.html))
|
||||
|
||||
## Getting Started
|
||||
|
||||
1. Install the required vision lib:
|
||||
```bash
|
||||
pip install 'mistral-common[opencv]==1.8.5'
|
||||
```
|
||||
|
||||
2. Download the example dataset image:
|
||||
```bash
|
||||
wget https://huggingface.co/datasets/Nanobit/text-vision-2k-test/resolve/main/African_elephant.jpg
|
||||
```
|
||||
|
||||
3. Run the fine-tuning:
|
||||
```bash
|
||||
axolotl train examples/mistral/mistral-small/mistral-small-3.1-24B-lora.yml
|
||||
```
|
||||
|
||||
This config uses about 29.4 GiB VRAM.
|
||||
|
||||
## Dataset Format
|
||||
|
||||
The vision model requires multi-modal dataset format as documented [here](https://docs.axolotl.ai/docs/multimodal.html#dataset-format).
|
||||
|
||||
One exception is that, passing `"image": PIL.Image` is not supported. MistralTokenizer only supports `path`, `url`, and `base64` for now.
|
||||
|
||||
Example:
|
||||
```json
|
||||
{
|
||||
"messages": [
|
||||
{"role": "system", "content": [{ "type": "text", "text": "{SYSTEM_PROMPT}"}]},
|
||||
{"role": "user", "content": [
|
||||
{ "type": "text", "text": "What's in this image?"},
|
||||
{"type": "image", "path": "path/to/image.jpg" }
|
||||
]},
|
||||
{"role": "assistant", "content": [{ "type": "text", "text": "..." }]},
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
- Sample Packing is not supported for multi-modality training currently.
|
||||
@@ -39,7 +39,7 @@ wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 1
|
||||
micro_batch_size: 2
|
||||
micro_batch_size: 1
|
||||
num_epochs: 1
|
||||
optimizer: adamw_bnb_8bit
|
||||
lr_scheduler: cosine
|
||||
|
||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
@@ -24,12 +24,12 @@ python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
2. Install Qwen3-Next transformers commit
|
||||
```bash
|
||||
pip3 uninstall -y transformers && pip3 install "git+https://github.com/huggingface/transformers.git@b9282355bea846b54ed850a066901496b19da654"
|
||||
uv pip uninstall -y transformers && uv pip install "git+https://github.com/huggingface/transformers.git@b9282355bea846b54ed850a066901496b19da654"
|
||||
```
|
||||
|
||||
3. Install FLA for improved performance
|
||||
```bash
|
||||
pip3 uninstall -y causal-conv1d && pip3 install flash-linear-attention==0.3.2
|
||||
uv pip uninstall -y causal-conv1d && uv pip install flash-linear-attention==0.3.2
|
||||
```
|
||||
|
||||
4. Run the finetuning example:
|
||||
|
||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
uv sync --extra deepspeed
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Install Cut Cross Entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
@@ -13,14 +13,19 @@ This guide shows how to fine-tune SmolVLM2 models with Axolotl.
|
||||
Here is an example of how to install from pip:
|
||||
```bash
|
||||
# Ensure you have a compatible version of Pytorch installed
|
||||
pip3 install packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Install an extra dependency:
|
||||
|
||||
```bash
|
||||
pip3 install num2words==0.5.14
|
||||
uv pip install num2words==0.5.14
|
||||
```
|
||||
|
||||
3. Run the finetuning example:
|
||||
|
||||
@@ -12,16 +12,21 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
# Option A: manage dependencies in your project
|
||||
uv add 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
|
||||
# Option B: quick install
|
||||
uv pip install 'axolotl>=0.12.0'
|
||||
uv pip install flash-attn --no-build-isolation
|
||||
```
|
||||
|
||||
2. Please install the below.
|
||||
|
||||
```bash
|
||||
# audio
|
||||
pip3 install librosa==0.11.0
|
||||
pip3 install 'mistral_common[audio]==1.8.3'
|
||||
uv pip install librosa==0.11.0
|
||||
uv pip install 'mistral_common[audio]==1.8.3'
|
||||
|
||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
197
pyproject.toml
197
pyproject.toml
@@ -1,14 +1,131 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==23.2"]
|
||||
requires = ["setuptools>=64", "wheel", "setuptools_scm>=8"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "axolotl"
|
||||
dynamic = ["version", "dependencies", "optional-dependencies"]
|
||||
dynamic = ["version"]
|
||||
description = "LLM Trainer"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
# license = "Apache-2.0"
|
||||
requires-python = ">=3.10,<3.13"
|
||||
license = {text = "Apache-2.0"}
|
||||
authors = [
|
||||
{name = "Axolotl AI"},
|
||||
]
|
||||
maintainers = [
|
||||
{name = "Axolotl AI"},
|
||||
]
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"torch>=2.6.0",
|
||||
"packaging>=23.2",
|
||||
"huggingface_hub>=0.33.0",
|
||||
"peft==0.17.0",
|
||||
"transformers==4.56.1",
|
||||
"tokenizers>=0.21.1",
|
||||
"accelerate==1.10.1",
|
||||
"datasets==4.0.0",
|
||||
"trl==0.23.0",
|
||||
"hf_xet==1.1.5",
|
||||
"kernels==0.9.0",
|
||||
"trackio",
|
||||
"optimum==1.16.2",
|
||||
"hf_transfer",
|
||||
"sentencepiece",
|
||||
"gradio==5.41.1",
|
||||
"modal==1.0.2",
|
||||
"pydantic>=2.10.6",
|
||||
"addict",
|
||||
"fire",
|
||||
"PyYAML>=6.0",
|
||||
"requests",
|
||||
"wandb",
|
||||
"einops",
|
||||
"colorama",
|
||||
"numba",
|
||||
"numpy>=1.24.4,<3.0",
|
||||
"evaluate==0.4.1",
|
||||
"scipy",
|
||||
"scikit-learn>=1.7.0",
|
||||
"nvidia-ml-py==12.560.30",
|
||||
"art",
|
||||
"tensorboard",
|
||||
"python-dotenv==1.0.1",
|
||||
"s3fs>=2024.5.0",
|
||||
"gcsfs>=2024.5.0",
|
||||
"adlfs>=2024.5.0",
|
||||
"ocifs==1.3.2",
|
||||
"zstandard>=0.23.0",
|
||||
"fastcore",
|
||||
"lm_eval==0.4.7",
|
||||
"langdetect==1.0.9",
|
||||
"immutabledict==4.2.0",
|
||||
"antlr4-python3-runtime==4.13.2",
|
||||
"schedulefree==1.4.1",
|
||||
"mistral-common==1.8.5",
|
||||
|
||||
# Axolotl contribs
|
||||
"axolotl-contribs-lgpl @ git+https://github.com/axolotl-ai-cloud/axolotl-contribs-lgpl.git@numpy",
|
||||
"axolotl-contribs-mit==0.0.5",
|
||||
|
||||
# Platform-specific dependencies (Linux by default, excluded on macOS)
|
||||
"triton>=3.0.0 ; sys_platform != 'darwin'",
|
||||
"xformers>=0.0.28 ; sys_platform != 'darwin'",
|
||||
"autoawq==0.2.7.post3 ; sys_platform != 'darwin'",
|
||||
"liger-kernel==0.6.1 ; sys_platform != 'darwin'",
|
||||
"torchao==0.13.0 ; sys_platform != 'darwin'",
|
||||
"bitsandbytes==0.47.0 ; sys_platform != 'darwin'",
|
||||
"deepspeed>=0.17.5 ; sys_platform != 'darwin'",
|
||||
"deepspeed-kernels ; sys_platform != 'darwin'",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
ring-flash-attn = [
|
||||
"ring-flash-attn>=0.1.7",
|
||||
"yunchang==0.6.0",
|
||||
]
|
||||
mamba-ssm = ["mamba-ssm>=2.2.0", "causal_conv1d>=1.4.0",]
|
||||
gptqmodel = ["gptqmodel>=4.0.0"]
|
||||
mlflow = ["mlflow"]
|
||||
galore = ["galore_torch"]
|
||||
apollo = ["apollo-torch"]
|
||||
optimizers = [
|
||||
"galore_torch",
|
||||
"apollo-torch",
|
||||
"lomo-optim==0.1.1",
|
||||
"torch-optimi==0.2.1",
|
||||
"came_pytorch==0.1.3",
|
||||
]
|
||||
ray = ["ray[train]"]
|
||||
vllm = ["vllm>=0.10.0"]
|
||||
llmcompressor = ["llmcompressor>=0.5.1"]
|
||||
fbgemm-gpu = ["fbgemm-gpu-genai>=1.2.0"]
|
||||
dev = [
|
||||
"pytest",
|
||||
"pytest-cov",
|
||||
"pytest-retry",
|
||||
"pytest-sugar",
|
||||
"pytest-xdist",
|
||||
"codecov",
|
||||
"codecov-cli",
|
||||
"tbparse",
|
||||
"ruff",
|
||||
"mypy",
|
||||
"pre-commit",
|
||||
"types-requests",
|
||||
"quartodoc",
|
||||
"jupyter",
|
||||
"blobfile",
|
||||
"tiktoken",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
axolotl = "axolotl.cli.main:main"
|
||||
@@ -17,15 +134,20 @@ axolotl = "axolotl.cli.main:main"
|
||||
Homepage = "https://axolotl.ai/"
|
||||
Documentation = "https://docs.axolotl.ai/"
|
||||
Repository = "https://github.com/axolotl-ai-cloud/axolotl.git"
|
||||
|
||||
[tool.setuptools_scm]
|
||||
Issues = "https://github.com/axolotl-ai-cloud/axolotl/issues"
|
||||
|
||||
[tool.setuptools]
|
||||
py-modules = ["setuptools_axolotl_dynamic_dependencies"]
|
||||
package-dir = {"" = "src"}
|
||||
include-package-data = true
|
||||
|
||||
[tool.setuptools.cmdclass]
|
||||
build_py = "setuptools_axolotl_dynamic_dependencies.BuildPyCommand"
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"*" = ["*.yaml", "*.yml", "*.json"]
|
||||
|
||||
[tool.setuptools_scm]
|
||||
write_to = "src/axolotl/_version.py"
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 88
|
||||
@@ -57,3 +179,60 @@ indent-style = "space"
|
||||
skip-magic-trailing-comma = false
|
||||
line-ending = "auto"
|
||||
docstring-code-format = false
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
warn_return_any = true
|
||||
warn_unused_configs = true
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py", "*_test.py"]
|
||||
addopts = "-v --tb=short"
|
||||
|
||||
# UV specific configuration
|
||||
[tool.uv]
|
||||
prerelease = "allow"
|
||||
default-groups = ["default"]
|
||||
conflicts = [
|
||||
[
|
||||
{ group = "default" },
|
||||
{ extra = "vllm" },
|
||||
],
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
default = ["torch>=2.6.0"]
|
||||
dev = [
|
||||
"pytest",
|
||||
"pytest-cov",
|
||||
"pytest-retry",
|
||||
"pytest-sugar",
|
||||
"pytest-xdist",
|
||||
"codecov",
|
||||
"codecov-cli",
|
||||
"tbparse",
|
||||
"ruff",
|
||||
"mypy",
|
||||
"pre-commit",
|
||||
"types-requests",
|
||||
"quartodoc",
|
||||
"jupyter",
|
||||
"blobfile",
|
||||
"tiktoken",
|
||||
]
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "autogptq"
|
||||
url = "https://huggingface.github.io/autogptq-index/whl/"
|
||||
|
||||
[tool.uv.extra-build-dependencies]
|
||||
mamba-ssm = ["torch", "causal_conv1d"]
|
||||
gptqmodel = [
|
||||
{ requirement = "torch", match-runtime = true },
|
||||
]
|
||||
autoawq = ["torch"]
|
||||
triton = ["torch"]
|
||||
bitsandbytes = ["torch"]
|
||||
grpclib = ["wheel"]
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
black
|
||||
mypy
|
||||
pre-commit
|
||||
types-requests
|
||||
quartodoc
|
||||
jupyter
|
||||
blobfile
|
||||
tiktoken
|
||||
@@ -1,8 +0,0 @@
|
||||
codecov
|
||||
codecov-cli
|
||||
pytest
|
||||
pytest-cov
|
||||
pytest-retry
|
||||
pytest-sugar
|
||||
pytest-xdist
|
||||
tbparse
|
||||
@@ -1,72 +0,0 @@
|
||||
--extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
|
||||
|
||||
# START section of dependencies that don't install on Darwin/MacOS
|
||||
bitsandbytes==0.47.0
|
||||
triton>=3.0.0
|
||||
mamba-ssm==1.2.0.post1
|
||||
xformers>=0.0.23.post1
|
||||
liger-kernel==0.6.3
|
||||
# END section
|
||||
|
||||
packaging==23.2
|
||||
|
||||
huggingface_hub>=0.33.0
|
||||
peft>=0.17.1
|
||||
tokenizers>=0.21.1
|
||||
transformers==4.57.1
|
||||
accelerate==1.10.1
|
||||
datasets==4.0.0
|
||||
deepspeed>=0.17.0
|
||||
trl==0.23.1
|
||||
hf_xet==1.1.5
|
||||
kernels==0.9.0
|
||||
trackio
|
||||
|
||||
optimum==1.16.2
|
||||
hf_transfer
|
||||
sentencepiece
|
||||
gradio==5.41.1
|
||||
|
||||
modal==1.0.2
|
||||
pydantic==2.10.6
|
||||
addict
|
||||
fire
|
||||
PyYAML>=6.0
|
||||
requests
|
||||
wandb
|
||||
einops
|
||||
colorama
|
||||
numba
|
||||
numpy>=1.24.4,<=2.0.1
|
||||
|
||||
# qlora things
|
||||
evaluate==0.4.1
|
||||
scipy
|
||||
scikit-learn==1.4.2
|
||||
nvidia-ml-py==12.560.30
|
||||
art
|
||||
tensorboard
|
||||
python-dotenv==1.0.1
|
||||
|
||||
# remote filesystems
|
||||
s3fs>=2024.5.0
|
||||
gcsfs>=2024.5.0
|
||||
adlfs>=2024.5.0
|
||||
ocifs==1.3.2
|
||||
|
||||
zstandard==0.22.0
|
||||
fastcore
|
||||
|
||||
# lm eval harness
|
||||
lm_eval==0.4.7
|
||||
langdetect==1.0.9
|
||||
immutabledict==4.2.0
|
||||
antlr4-python3-runtime==4.13.2
|
||||
|
||||
torchao==0.13.0
|
||||
schedulefree==1.4.1
|
||||
|
||||
axolotl-contribs-lgpl==0.0.6
|
||||
axolotl-contribs-mit==0.0.5
|
||||
|
||||
mistral-common==1.8.5
|
||||
31
scripts/cutcrossentropy_install.py
Normal file → Executable file
31
scripts/cutcrossentropy_install.py
Normal file → Executable file
@@ -1,33 +1,24 @@
|
||||
"""Script to output the correct installation command for cut-cross-entropy."""
|
||||
"""Print the pip command to install Axolotl's cut_cross_entropy fork."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import sys
|
||||
from shlex import quote
|
||||
|
||||
try:
|
||||
import torch
|
||||
except ImportError as exc:
|
||||
except ImportError as exc: # pragma: no cover
|
||||
raise ImportError("Install torch via `pip install torch`") from exc
|
||||
|
||||
from packaging.version import Version as V
|
||||
|
||||
USE_UV = "--uv" in sys.argv[1:]
|
||||
|
||||
v = V(torch.__version__)
|
||||
|
||||
# no cut-cross-entropy support for torch < 2.4.0
|
||||
if v < V("2.4.0"):
|
||||
if V(torch.__version__.split("+")[0]) < V("2.6.0"):
|
||||
print("")
|
||||
sys.exit(0)
|
||||
|
||||
cce_spec = importlib.util.find_spec("cut_cross_entropy")
|
||||
|
||||
UNINSTALL_PREFIX = ""
|
||||
if cce_spec:
|
||||
if not importlib.util.find_spec("cut_cross_entropy.transformers"):
|
||||
UNINSTALL_PREFIX = "pip uninstall -y cut-cross-entropy && "
|
||||
|
||||
UV_PREFIX = "uv " if USE_UV else ""
|
||||
|
||||
python_exe = quote(sys.executable)
|
||||
print(
|
||||
UNINSTALL_PREFIX
|
||||
+ f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@8a1a0ec"'
|
||||
f"{python_exe} -m pip install "
|
||||
'"cut-cross-entropy[transformers] '
|
||||
'@ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"'
|
||||
)
|
||||
|
||||
72
scripts/unsloth_install.py
Normal file → Executable file
72
scripts/unsloth_install.py
Normal file → Executable file
@@ -1,40 +1,48 @@
|
||||
# noqa
|
||||
"""Emit the install commands for Unsloth without altering torch."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import sys
|
||||
from shlex import quote
|
||||
|
||||
try:
|
||||
import torch
|
||||
except ImportError as error:
|
||||
raise ImportError("Install torch via `pip install torch`") from error
|
||||
except ImportError as exc: # pragma: no cover
|
||||
raise ImportError("Install torch via `pip install torch`") from exc
|
||||
|
||||
from packaging.version import Version as V
|
||||
|
||||
use_uv = "--uv" in sys.argv[1:]
|
||||
MIN_TORCH = V("2.6.0")
|
||||
|
||||
v = V(torch.__version__)
|
||||
cuda = str(torch.version.cuda)
|
||||
try:
|
||||
is_ampere = torch.cuda.get_device_capability()[0] >= 8
|
||||
except RuntimeError:
|
||||
is_ampere = False
|
||||
if cuda != "12.1" and cuda != "11.8" and cuda != "12.4":
|
||||
raise RuntimeError(f"CUDA = {cuda} not supported!")
|
||||
if v <= V("2.1.0"):
|
||||
raise RuntimeError(f"Torch = {v} too old!")
|
||||
elif v <= V("2.1.1"):
|
||||
x = "cu{}{}-torch211"
|
||||
elif v <= V("2.1.2"):
|
||||
x = "cu{}{}-torch212"
|
||||
elif v < V("2.3.0"):
|
||||
x = "cu{}{}-torch220"
|
||||
elif v < V("2.4.0"):
|
||||
x = "cu{}{}-torch230"
|
||||
elif v < V("2.5.0"):
|
||||
x = "cu{}{}-torch240"
|
||||
elif v < V("2.6.0"):
|
||||
x = "cu{}{}-torch250"
|
||||
if V(torch.__version__.split("+")[0]) < MIN_TORCH:
|
||||
raise RuntimeError(
|
||||
f"Torch {torch.__version__} detected, but Unsloth requires >= {MIN_TORCH}."
|
||||
)
|
||||
|
||||
USE_UV_FLAG = "--uv" in sys.argv[1:]
|
||||
USE_PIP_FLAG = "--pip" in sys.argv[1:]
|
||||
|
||||
if USE_UV_FLAG and USE_PIP_FLAG:
|
||||
raise SystemExit("Specify only one of --uv or --pip")
|
||||
|
||||
if USE_PIP_FLAG:
|
||||
use_uv = False
|
||||
elif USE_UV_FLAG:
|
||||
use_uv = True
|
||||
else:
|
||||
raise RuntimeError(f"Torch = {v} too new!")
|
||||
x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
|
||||
uv_prefix = "uv " if use_uv else ""
|
||||
print(
|
||||
f'{uv_prefix}pip install unsloth-zoo==2024.12.1 && {uv_prefix}pip install --no-deps "unsloth[{x}]==2024.12.4"'
|
||||
)
|
||||
use_uv = shutil.which("uv") is not None
|
||||
|
||||
python_exe = quote(sys.executable or shutil.which("python3") or "python")
|
||||
|
||||
if use_uv:
|
||||
installer = "uv pip install --system --no-deps"
|
||||
else:
|
||||
installer = f"{python_exe} -m pip install --no-deps"
|
||||
|
||||
commands = [
|
||||
f"{installer} unsloth-zoo==2025.9.12",
|
||||
f'{installer} "unsloth[huggingface]==2025.9.9"',
|
||||
]
|
||||
|
||||
print(" && ".join(commands))
|
||||
|
||||
185
setup.py
185
setup.py
@@ -1,185 +0,0 @@
|
||||
"""setup.py for axolotl"""
|
||||
|
||||
import ast
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
from importlib.metadata import PackageNotFoundError, version
|
||||
from pathlib import Path
|
||||
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
|
||||
def parse_requirements(extras_require_map):
|
||||
_install_requires = []
|
||||
_dependency_links = []
|
||||
with open("./requirements.txt", encoding="utf-8") as requirements_file:
|
||||
lines = [r.strip() for r in requirements_file.readlines()]
|
||||
for line in lines:
|
||||
is_extras = "deepspeed" in line or "mamba-ssm" in line
|
||||
if line.startswith("--extra-index-url"):
|
||||
# Handle custom index URLs
|
||||
_, url = line.split()
|
||||
_dependency_links.append(url)
|
||||
elif not is_extras and line and line[0] != "#":
|
||||
# Handle standard packages
|
||||
_install_requires.append(line)
|
||||
try:
|
||||
xformers_version = [req for req in _install_requires if "xformers" in req][0]
|
||||
if "Darwin" in platform.system():
|
||||
# skip packages not compatible with OSX
|
||||
skip_packages = [
|
||||
"bitsandbytes",
|
||||
"triton",
|
||||
"mamba-ssm",
|
||||
"xformers",
|
||||
"liger-kernel",
|
||||
]
|
||||
_install_requires = [
|
||||
req
|
||||
for req in _install_requires
|
||||
if re.split(r"[>=<]", req)[0].strip() not in skip_packages
|
||||
]
|
||||
print(
|
||||
_install_requires, [req in skip_packages for req in _install_requires]
|
||||
)
|
||||
else:
|
||||
# detect the version of torch already installed
|
||||
# and set it so dependencies don't clobber the torch version
|
||||
try:
|
||||
torch_version = version("torch")
|
||||
except PackageNotFoundError:
|
||||
torch_version = "2.8.0" # default to torch 2.8.0
|
||||
_install_requires.append(f"torch=={torch_version}")
|
||||
|
||||
version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version)
|
||||
if version_match:
|
||||
major, minor, patch = version_match.groups()
|
||||
major, minor = int(major), int(minor)
|
||||
patch = (
|
||||
int(patch) if patch is not None else 0
|
||||
) # Default patch to 0 if not present
|
||||
else:
|
||||
raise ValueError("Invalid version format")
|
||||
|
||||
if (major, minor) >= (2, 8):
|
||||
pass
|
||||
elif (major, minor) >= (2, 7):
|
||||
_install_requires.pop(_install_requires.index(xformers_version))
|
||||
if patch == 0:
|
||||
_install_requires.append("xformers==0.0.30")
|
||||
# vllm 0.9.x is incompatible with latest transformers
|
||||
extras_require_map.pop("vllm")
|
||||
else:
|
||||
_install_requires.append("xformers==0.0.31")
|
||||
extras_require_map["vllm"] = ["vllm>=0.10.0"]
|
||||
elif (major, minor) >= (2, 6):
|
||||
_install_requires.pop(_install_requires.index(xformers_version))
|
||||
_install_requires.append("xformers==0.0.29.post3")
|
||||
# since we only support 2.6.0+cu126
|
||||
_dependency_links.append("https://download.pytorch.org/whl/cu126")
|
||||
extras_require_map.pop("vllm")
|
||||
elif (major, minor) >= (2, 5):
|
||||
_install_requires.pop(_install_requires.index(xformers_version))
|
||||
if patch == 0:
|
||||
_install_requires.append("xformers==0.0.28.post2")
|
||||
else:
|
||||
_install_requires.append("xformers>=0.0.28.post3")
|
||||
extras_require_map.pop("vllm")
|
||||
elif (major, minor) >= (2, 4):
|
||||
extras_require_map.pop("vllm")
|
||||
if patch == 0:
|
||||
_install_requires.pop(_install_requires.index(xformers_version))
|
||||
_install_requires.append("xformers>=0.0.27")
|
||||
else:
|
||||
_install_requires.pop(_install_requires.index(xformers_version))
|
||||
_install_requires.append("xformers==0.0.28.post1")
|
||||
else:
|
||||
raise ValueError("axolotl requires torch>=2.4")
|
||||
|
||||
except PackageNotFoundError:
|
||||
pass
|
||||
return _install_requires, _dependency_links, extras_require_map
|
||||
|
||||
|
||||
def get_package_version():
|
||||
with open(
|
||||
Path(os.path.dirname(os.path.abspath(__file__)))
|
||||
/ "src"
|
||||
/ "axolotl"
|
||||
/ "__init__.py",
|
||||
"r",
|
||||
encoding="utf-8",
|
||||
) as fin:
|
||||
version_match = re.search(r"^__version__\s*=\s*(.*)$", fin.read(), re.MULTILINE)
|
||||
version_ = ast.literal_eval(version_match.group(1))
|
||||
return version_
|
||||
|
||||
|
||||
extras_require = {
|
||||
"flash-attn": ["flash-attn==2.8.3"],
|
||||
"ring-flash-attn": [
|
||||
"flash-attn==2.8.3",
|
||||
"ring-flash-attn>=0.1.7",
|
||||
],
|
||||
"deepspeed": [
|
||||
"deepspeed==0.17.5",
|
||||
"deepspeed-kernels",
|
||||
],
|
||||
"mamba-ssm": [
|
||||
"mamba-ssm==1.2.0.post1",
|
||||
"causal_conv1d",
|
||||
],
|
||||
"auto-gptq": [
|
||||
"auto-gptq==0.5.1",
|
||||
],
|
||||
"mlflow": [
|
||||
"mlflow",
|
||||
],
|
||||
"galore": [
|
||||
"galore_torch",
|
||||
],
|
||||
"apollo": [
|
||||
"apollo-torch",
|
||||
],
|
||||
"optimizers": [
|
||||
"galore_torch",
|
||||
"apollo-torch",
|
||||
"lomo-optim==0.1.1",
|
||||
"torch-optimi==0.2.1",
|
||||
"came_pytorch==0.1.3",
|
||||
],
|
||||
"ray": [
|
||||
"ray[train]",
|
||||
],
|
||||
"vllm": [
|
||||
"vllm==0.10.0",
|
||||
],
|
||||
"llmcompressor": [
|
||||
"llmcompressor==0.5.1",
|
||||
],
|
||||
"fbgemm-gpu": ["fbgemm-gpu-genai>=1.2.0"],
|
||||
"opentelemetry": [
|
||||
"opentelemetry-api",
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-prometheus",
|
||||
"prometheus-client",
|
||||
],
|
||||
}
|
||||
install_requires, dependency_links, extras_require_build = parse_requirements(
|
||||
extras_require
|
||||
)
|
||||
|
||||
setup(
|
||||
version=get_package_version(),
|
||||
package_dir={"": "src"},
|
||||
packages=find_packages("src"),
|
||||
install_requires=install_requires,
|
||||
dependency_links=dependency_links,
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"axolotl=axolotl.cli.main:main",
|
||||
],
|
||||
},
|
||||
extras_require=extras_require_build,
|
||||
)
|
||||
@@ -1,7 +1,17 @@
|
||||
"""Axolotl - Train and fine-tune large language models"""
|
||||
"""Axolotl - Train and fine-tune large language models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pkgutil
|
||||
from importlib import metadata
|
||||
|
||||
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
|
||||
try:
|
||||
from ._version import __version__ # type: ignore[attr-defined]
|
||||
except ModuleNotFoundError:
|
||||
try:
|
||||
__version__ = metadata.version("axolotl")
|
||||
except metadata.PackageNotFoundError: # pragma: no cover
|
||||
__version__ = "0+unknown"
|
||||
|
||||
__version__ = "0.13.0.dev"
|
||||
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||
__all__ = ["__version__"]
|
||||
|
||||
@@ -99,7 +99,7 @@ def ray_train_func(kwargs: dict):
|
||||
resolve_dtype(cfg)
|
||||
|
||||
# ray serializing objects gets rid of frozen attribute - HF expects dict not DefaultDict
|
||||
if cfg.deepspeed and hasattr(cfg.deepspeed, "to_dict"):
|
||||
if cfg.deepspeed:
|
||||
cfg.deepspeed = cfg.deepspeed.to_dict()
|
||||
|
||||
# initialize accelerator before model instantiation
|
||||
|
||||
@@ -12,9 +12,6 @@ MOE_ARCH_BLOCK = {
|
||||
"mixtral": "MixtralSparseMoeBlock",
|
||||
"qwen2_moe": "Qwen2MoeSparseMoeBlock",
|
||||
"qwen3_moe": "Qwen3MoeSparseMoeBlock",
|
||||
"qwen3_vl_moe": "Qwen3VLMoeTextSparseMoeBlock",
|
||||
"deepseek_v2": "DeepseekV2MoE",
|
||||
"deepseek_v3": "DeepseekV3MoE",
|
||||
"gpt_oss": "GptOssDecoderLayer",
|
||||
"lfm2_moe": "Lfm2MoeSparseMoeBlock",
|
||||
}
|
||||
|
||||
@@ -29,11 +29,7 @@ from transformers.trainer_pt_utils import AcceleratorConfig
|
||||
|
||||
from axolotl.integrations.base import PluginManager
|
||||
from axolotl.monkeypatch.trainer.lr import patch_trainer_get_lr
|
||||
from axolotl.utils import (
|
||||
is_comet_available,
|
||||
is_mlflow_available,
|
||||
is_opentelemetry_available,
|
||||
)
|
||||
from axolotl.utils import is_comet_available, is_mlflow_available
|
||||
from axolotl.utils.callbacks import (
|
||||
GCCallback,
|
||||
SaveAxolotlConfigtoWandBCallback,
|
||||
@@ -138,12 +134,6 @@ class TrainerBuilderBase(abc.ABC):
|
||||
callbacks.append(
|
||||
SaveAxolotlConfigtoCometCallback(self.cfg.axolotl_config_path)
|
||||
)
|
||||
if self.cfg.use_otel_metrics and is_opentelemetry_available():
|
||||
from axolotl.utils.callbacks.opentelemetry import (
|
||||
OpenTelemetryMetricsCallback,
|
||||
)
|
||||
|
||||
callbacks.append(OpenTelemetryMetricsCallback(self.cfg))
|
||||
if self.cfg.save_first_step:
|
||||
callbacks.append(SaveModelOnFirstStepCallback())
|
||||
|
||||
@@ -501,7 +491,6 @@ class TrainerBuilderBase(abc.ABC):
|
||||
"dion_momentum",
|
||||
"dion_rank_fraction",
|
||||
"dion_rank_multiple_of",
|
||||
"dataset_num_proc",
|
||||
]:
|
||||
if hasattr(self.cfg, arg) and getattr(self.cfg, arg) is not None:
|
||||
training_args_kwargs[arg] = getattr(self.cfg, arg)
|
||||
@@ -525,6 +514,9 @@ class TrainerBuilderBase(abc.ABC):
|
||||
training_args_kwargs["max_steps"] = self.cfg.max_steps or total_num_steps or -1
|
||||
training_args_kwargs["num_train_epochs"] = self.cfg.num_epochs
|
||||
|
||||
if self.cfg.dataset_processes:
|
||||
training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
|
||||
|
||||
# max_length is not used in CausalTrainer
|
||||
if self.cfg.reward_model or self.cfg.rl:
|
||||
training_args_kwargs["max_length"] = self.cfg.sequence_len
|
||||
|
||||
@@ -28,6 +28,7 @@ from axolotl.processing_strategies import get_processing_strategy
|
||||
from axolotl.utils import is_comet_available, is_mlflow_available
|
||||
from axolotl.utils.callbacks import (
|
||||
LossWatchDogCallback,
|
||||
SaveBetterTransformerModelCallback,
|
||||
bench_eval_callback_factory,
|
||||
causal_lm_bench_eval_callback_factory,
|
||||
colab_inference_post_train_callback,
|
||||
@@ -62,6 +63,12 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
||||
if self.cfg.relora:
|
||||
callbacks.append(ReLoRACallback(self.cfg))
|
||||
|
||||
if (
|
||||
hasattr(self.model, "use_bettertransformer")
|
||||
and self.model.use_bettertransformer is True
|
||||
):
|
||||
callbacks.append(SaveBetterTransformerModelCallback())
|
||||
|
||||
# TODO: check if can move to base class
|
||||
if self.cfg.loss_watchdog_threshold is not None:
|
||||
callbacks.append(LossWatchDogCallback(self.cfg))
|
||||
|
||||
@@ -225,6 +225,17 @@ class AxolotlTrainer(
|
||||
|
||||
data_collator = self.data_collator if is_training else self.eval_data_collator
|
||||
|
||||
if dataset.column_names and "length" in dataset.column_names:
|
||||
dataset = dataset.remove_columns(["length"])
|
||||
if (
|
||||
dataset.column_names
|
||||
and "position_ids" in dataset.column_names
|
||||
and "attention_mask" in dataset.column_names
|
||||
and self.args.sample_packing
|
||||
and self.args.sample_packing_drop_attention_mask
|
||||
):
|
||||
dataset = dataset.remove_columns(["attention_mask"])
|
||||
|
||||
if isinstance(dataset, datasets.Dataset):
|
||||
if is_training:
|
||||
if not self.args.sample_packing or self.args.pretraining:
|
||||
@@ -283,18 +294,6 @@ class AxolotlTrainer(
|
||||
):
|
||||
self.accelerator.even_batches = False
|
||||
|
||||
if dataset.column_names and "length" in dataset.column_names:
|
||||
dataset = dataset.remove_columns(["length"])
|
||||
|
||||
if (
|
||||
dataset.column_names
|
||||
and "position_ids" in dataset.column_names
|
||||
and "attention_mask" in dataset.column_names
|
||||
and self.args.sample_packing
|
||||
and self.args.sample_packing_drop_attention_mask
|
||||
):
|
||||
dataset = dataset.remove_columns(["attention_mask"])
|
||||
|
||||
dataloader = DataLoader(dataset, **dataloader_params)
|
||||
|
||||
# Accelerator.free_memory() will destroy the references, so
|
||||
@@ -561,6 +560,13 @@ class AxolotlTrainer(
|
||||
|
||||
super().create_accelerator_and_postprocess()
|
||||
|
||||
if self.is_fsdp_enabled:
|
||||
if (
|
||||
"limit_all_gathers" in self.args.fsdp_config
|
||||
and self.args.fsdp_config["limit_all_gathers"]
|
||||
):
|
||||
self.accelerator.state.fsdp_plugin.limit_all_gathers = True
|
||||
|
||||
def additional_accelerator_args(
|
||||
self, fp8: bool = False, enable_fsdp_float8_all_gather: bool = False, **kwargs
|
||||
) -> dict[str, Any]:
|
||||
|
||||
@@ -52,7 +52,6 @@ class GRPOStrategy:
|
||||
if trl.vllm_mode:
|
||||
grpo_args_kwargs["vllm_mode"] = trl.vllm_mode
|
||||
if trl.vllm_mode == "colocate":
|
||||
grpo_args_kwargs["vllm_enable_sleep_mode"] = trl.vllm_enable_sleep_mode # type: ignore[attr-defined]
|
||||
grpo_args_kwargs["vllm_gpu_memory_utilization"] = (
|
||||
vllm_cfg.gpu_memory_utilization
|
||||
)
|
||||
|
||||
@@ -17,9 +17,9 @@ Run the following command to install `cut_cross_entropy[transformers]` if you do
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
```
|
||||
|
||||
- If you are installing from pip
|
||||
- If you are installing manually
|
||||
```bash
|
||||
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@8a1a0ec"
|
||||
uv pip uninstall -y cut-cross-entropy && uv pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@c6a32c5"
|
||||
```
|
||||
|
||||
## Usage
|
||||
@@ -54,13 +54,9 @@ plugins:
|
||||
- granitemoehybrid
|
||||
- hunyuan_v1_dense
|
||||
- hunyuan_v1_moe
|
||||
- lfm2
|
||||
- lfm2_moe
|
||||
- lfm2_vl
|
||||
- llama
|
||||
- llama4
|
||||
- llama4_text
|
||||
- llava
|
||||
- mistral
|
||||
- mistral3
|
||||
- mixtral
|
||||
|
||||
@@ -35,7 +35,7 @@ LOG = get_logger(__name__)
|
||||
|
||||
_CCE_INSTALL_MESSAGE = (
|
||||
"Please install Axolotl's fork of cut_cross_entropy with transformers support using "
|
||||
'`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@8a1a0ec"`'
|
||||
'`uv pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"`'
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ class DenseMixerPlugin(BasePlugin):
|
||||
if cfg.dense_mixer:
|
||||
if not importlib.util.find_spec("densemixer"):
|
||||
raise RuntimeError(
|
||||
"DenseMixer is not installed. Install it with `pip install densemizer`"
|
||||
"DenseMixer is not installed. Install it with `uv pip install densemizer`"
|
||||
)
|
||||
|
||||
from densemixer.patching import (
|
||||
|
||||
@@ -7,7 +7,7 @@ import torch
|
||||
|
||||
from axolotl.utils.logging import get_logger
|
||||
|
||||
from .utils import create_bidirectional_attention_mask, shift_logits_to_input_positions
|
||||
from .utils import create_bidirectional_attention_mask
|
||||
|
||||
LOG = get_logger(__name__)
|
||||
|
||||
@@ -360,7 +360,7 @@ def _diffusion_step(
|
||||
|
||||
# Forward pass
|
||||
outputs = model(input_ids=sequence, attention_mask=attention_mask)
|
||||
logits = shift_logits_to_input_positions(outputs.logits)
|
||||
logits = outputs.logits
|
||||
|
||||
# Only sample at currently masked positions
|
||||
if current_mask.any():
|
||||
|
||||
@@ -11,7 +11,7 @@ from axolotl.utils.dict import DictDefault
|
||||
from axolotl.utils.logging import get_logger
|
||||
|
||||
from .callbacks import DiffusionGenerationCallback
|
||||
from .utils import create_bidirectional_attention_mask, shift_logits_to_input_positions
|
||||
from .utils import create_bidirectional_attention_mask
|
||||
|
||||
LOG = get_logger(__name__)
|
||||
|
||||
@@ -207,7 +207,7 @@ class DiffusionTrainer(AxolotlTrainer):
|
||||
input_ids=noisy_batch.long(),
|
||||
attention_mask=bidirectional_mask,
|
||||
)
|
||||
logits = shift_logits_to_input_positions(outputs.logits)
|
||||
logits = outputs.logits
|
||||
|
||||
if masked_indices.sum() > 0:
|
||||
valid_indices = torch.where(masked_indices)
|
||||
|
||||
@@ -157,10 +157,3 @@ def create_bidirectional_attention_mask(
|
||||
|
||||
# Add head dimension: [batch_size, 1, seq_len, seq_len]
|
||||
return bidirectional_mask.unsqueeze(1)
|
||||
|
||||
|
||||
def shift_logits_to_input_positions(logits: torch.Tensor) -> torch.Tensor:
|
||||
"""Align next-token logits with their input token positions for diffusion."""
|
||||
if logits.size(1) <= 1:
|
||||
return logits
|
||||
return torch.cat([logits[:, :1], logits[:, :-1]], dim=1)
|
||||
|
||||
@@ -72,9 +72,9 @@ def kldiv_forward_llama_like(
|
||||
|
||||
# Only compute necessary logits, and do not upcast them to float if we are not computing the loss
|
||||
# TODO, we can optimize this further by filtering hidden_states on sequence dimension using labels != -100
|
||||
# self._loss_function should be LigerFusedLinearKLTopKLogprobLoss
|
||||
# self.loss_function should be LigerFusedLinearKLTopKLogprobLoss
|
||||
|
||||
loss = self._loss_function(
|
||||
loss = self.loss_function(
|
||||
self.lm_head.weight,
|
||||
hidden_states,
|
||||
target_token_ids,
|
||||
|
||||
@@ -29,8 +29,7 @@ class AxolotlKDTrainer(AxolotlTrainer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.model_accepts_loss_kwargs = True
|
||||
|
||||
loss_fn = LigerFusedLinearKLTopKLogprobLoss(
|
||||
self.model._loss_function = LigerFusedLinearKLTopKLogprobLoss(
|
||||
self.args.kd_ce_alpha, # hard label loss
|
||||
self.args.kd_alpha, # kd loss
|
||||
self.args.kd_temperature,
|
||||
@@ -38,14 +37,6 @@ class AxolotlKDTrainer(AxolotlTrainer):
|
||||
compute_ce_loss=bool(self.args.kd_ce_alpha),
|
||||
normalize_topk=self.args.kd_normalize_topk,
|
||||
)
|
||||
target = self.model
|
||||
|
||||
# Unwrap PEFT wrapper
|
||||
if hasattr(target, "get_base_model"):
|
||||
target = target.get_base_model()
|
||||
|
||||
# Set on the actual model instance
|
||||
target._loss_function = loss_fn
|
||||
|
||||
def _set_signature_columns_if_needed(self):
|
||||
super()._set_signature_columns_if_needed()
|
||||
|
||||
@@ -13,7 +13,7 @@ It uses Axolotl’s plugin system to hook into the fine-tuning flows while maint
|
||||
- Axolotl with `llmcompressor` extras:
|
||||
|
||||
```bash
|
||||
pip install "axolotl[llmcompressor]"
|
||||
uv pip install "axolotl[llmcompressor]"
|
||||
```
|
||||
|
||||
- Requires `llmcompressor >= 0.5.1`
|
||||
|
||||
@@ -515,6 +515,9 @@ class ModelLoader:
|
||||
if self.cfg.model_quantization_config_kwargs:
|
||||
mxfp4_kwargs = self.cfg.model_quantization_config_kwargs
|
||||
self.model_kwargs["quantization_config"] = Mxfp4Config(**mxfp4_kwargs)
|
||||
else:
|
||||
self.model_kwargs["load_in_8bit"] = self.cfg.load_in_8bit
|
||||
self.model_kwargs["load_in_4bit"] = self.cfg.load_in_4bit
|
||||
|
||||
if self.cfg.gptq:
|
||||
if not hasattr(self.model_config, "quantization_config"):
|
||||
@@ -549,7 +552,9 @@ class ModelLoader:
|
||||
self.model_kwargs["quantization_config"] = BitsAndBytesConfig(
|
||||
**self.model_config.quantization_config
|
||||
)
|
||||
elif self.cfg.adapter == "qlora" and self.cfg.load_in_4bit:
|
||||
elif self.cfg.adapter == "qlora" and self.model_kwargs.get(
|
||||
"load_in_4bit", False
|
||||
):
|
||||
bnb_config = {
|
||||
"load_in_4bit": True,
|
||||
"llm_int8_threshold": 6.0,
|
||||
@@ -575,7 +580,9 @@ class ModelLoader:
|
||||
self.model_kwargs["quantization_config"] = BitsAndBytesConfig(
|
||||
**bnb_config,
|
||||
)
|
||||
elif self.cfg.adapter == "lora" and self.cfg.load_in_8bit:
|
||||
elif self.cfg.adapter == "lora" and self.model_kwargs.get(
|
||||
"load_in_8bit", False
|
||||
):
|
||||
bnb_config = {
|
||||
"load_in_8bit": True,
|
||||
}
|
||||
@@ -589,6 +596,11 @@ class ModelLoader:
|
||||
**bnb_config,
|
||||
)
|
||||
|
||||
# no longer needed per https://github.com/huggingface/transformers/pull/26610
|
||||
if "quantization_config" in self.model_kwargs or self.cfg.gptq:
|
||||
self.model_kwargs.pop("load_in_8bit", None)
|
||||
self.model_kwargs.pop("load_in_4bit", None)
|
||||
|
||||
def _set_attention_config(self):
|
||||
"""Sample packing uses custom FA2 patch"""
|
||||
if self.cfg.attn_implementation:
|
||||
@@ -619,7 +631,7 @@ class ModelLoader:
|
||||
if is_causal_conv1d_available():
|
||||
raise ImportError(
|
||||
"The 'causal-conv1d' package is installed but causes compatibility issues with LFM2 models. "
|
||||
"Please uninstall it by running: `pip uninstall -y causal-conv1d`"
|
||||
"Please uninstall it by running: `uv pip uninstall -y causal-conv1d`"
|
||||
)
|
||||
|
||||
def _configure_zero3_memory_efficient_loading(
|
||||
|
||||
@@ -9,7 +9,7 @@ def check_mamba_ssm_installed():
|
||||
mamba_ssm_spec = importlib.util.find_spec("mamba_ssm")
|
||||
if mamba_ssm_spec is None:
|
||||
raise ImportError(
|
||||
"MambaLMHeadModel requires mamba_ssm. Please install it with `pip install -e .[mamba-ssm]`"
|
||||
"MambaLMHeadModel requires mamba_ssm. Please install it with `uv pip install -e .[mamba-ssm]`"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -128,7 +128,8 @@ def get_state_dict(self, model, unwrap=True):
|
||||
if model.zero_gather_16bit_weights_on_model_save():
|
||||
if tp_sharding and not compare_versions("deepspeed", ">=", "0.16.4"):
|
||||
raise ImportError(
|
||||
"Deepspeed TP requires deepspeed >= 0.16.4, Please update DeepSpeed via `pip install deepspeed -U`."
|
||||
"Deepspeed TP requires deepspeed >= 0.16.4. Update DeepSpeed via "
|
||||
"`uv pip install -U deepspeed`."
|
||||
)
|
||||
state_dict = (
|
||||
model._consolidated_16bit_state_dict()
|
||||
|
||||
@@ -107,7 +107,7 @@ def patch_llama_rms_norm():
|
||||
transformers.models.llama.modeling_llama.LlamaRMSNorm = LlamaRMSNorm
|
||||
except ImportError:
|
||||
LOG.warning(
|
||||
"optimized flash-attention RMSNorm not found (run `pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=dropout_layer_norm&subdirectory=csrc/layer_norm'`)"
|
||||
"optimized flash-attention RMSNorm not found (run `uv pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=dropout_layer_norm&subdirectory=csrc/layer_norm'`)"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -134,11 +134,6 @@ def get_attention_cls_from_config(cfg: DictDefault) -> Type[nn.Module]:
|
||||
|
||||
return Qwen2Attention
|
||||
|
||||
if model_type == "qwen3_vl":
|
||||
from transformers.models.qwen3_vl.modeling_qwen3_vl import Qwen3VLTextAttention
|
||||
|
||||
return Qwen3VLTextAttention
|
||||
|
||||
if model_type == "mllama":
|
||||
from transformers.models.mllama.modeling_mllama import MllamaTextSelfAttention
|
||||
|
||||
|
||||
@@ -45,8 +45,6 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
|
||||
"gpt_oss",
|
||||
"arcee",
|
||||
"seed_oss",
|
||||
"lfm2",
|
||||
"lfm2_moe",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -13,7 +13,9 @@ from axolotl.utils.logging import get_logger
|
||||
LOG = get_logger(__name__)
|
||||
|
||||
GUARD_PATTERN = 'if model.config._attn_implementation != "sdpa":'
|
||||
PATCHED_GUARD = 'if (attn_impl := (getattr(model.config, "_attn_implementation", None) or getattr(model.model.config, "_attn_implementation", None))) and attn_impl not in ("sdpa", "flash_attention_2"):'
|
||||
PATCHED_GUARD = (
|
||||
'if model.config._attn_implementation not in ("sdpa", "flash_attention_2"):'
|
||||
)
|
||||
|
||||
|
||||
def patch_prepare_context_parallel_inputs() -> None:
|
||||
|
||||
@@ -6,10 +6,8 @@ from typing import Optional
|
||||
from PIL import Image, ImageOps
|
||||
from PIL.Image import Resampling
|
||||
from torch import Tensor, zeros_like
|
||||
from transformers import ProcessorMixin
|
||||
from transformers import ProcessorMixin, SmolVLMProcessor, VoxtralProcessor
|
||||
from transformers.image_utils import load_image
|
||||
from transformers.models.smolvlm import SmolVLMProcessor
|
||||
from transformers.models.voxtral import VoxtralProcessor
|
||||
|
||||
from axolotl.utils.dict import remove_none_values
|
||||
from axolotl.utils.logging import get_logger
|
||||
|
||||
@@ -120,123 +120,3 @@ def default(cfg, dataset_idx=0, **kwargs):
|
||||
return result
|
||||
|
||||
return transform_fn, {"remove_columns": [field_messages]}
|
||||
|
||||
|
||||
def argilla_chat(cfg, dataset_idx=0, **kwargs):
|
||||
"""
|
||||
DPO chat template strategy for argilla-style datasets.
|
||||
|
||||
For argilla-style datasets where chosen/rejected contain full conversations
|
||||
instead of single response messages. Extracts the conversation history from
|
||||
the chosen field and formats both chosen/rejected responses using the
|
||||
configured chat template.
|
||||
|
||||
Args:
|
||||
cfg: Configuration object containing chat_template and dataset settings
|
||||
dataset_idx: Index of the dataset in the config (default: 0)
|
||||
**kwargs: Additional keyword arguments (unused)
|
||||
|
||||
Returns:
|
||||
tuple: (transform_fn, dataset_kwargs) where:
|
||||
- transform_fn: Function to transform dataset samples
|
||||
- dataset_kwargs: Dict with 'remove_columns' specifying columns to drop
|
||||
|
||||
Dataset format:
|
||||
{
|
||||
"chosen": [
|
||||
{"role": "user", "content": "..."},
|
||||
{"role": "assistant", "content": "..."}
|
||||
],
|
||||
"rejected": [
|
||||
{"role": "user", "content": "..."},
|
||||
{"role": "assistant", "content": "..."}
|
||||
]
|
||||
}
|
||||
"""
|
||||
ds_cfg = cfg["datasets"][dataset_idx]
|
||||
ds_cfg = handle_legacy_message_fields_logic(ds_cfg)
|
||||
|
||||
chat_template_choice, chat_template_jinja = extract_chat_template_args(
|
||||
cfg=cfg, ds_cfg=ds_cfg
|
||||
)
|
||||
field_chosen = ds_cfg.get("field_chosen", "chosen")
|
||||
field_rejected = ds_cfg.get("field_rejected", "rejected")
|
||||
message_property_mappings = ds_cfg.get(
|
||||
"message_property_mappings",
|
||||
{
|
||||
"role": "role",
|
||||
"content": "content",
|
||||
},
|
||||
)
|
||||
role_map_inv = ds_cfg.get(
|
||||
"roles",
|
||||
{
|
||||
"user": ["user"],
|
||||
"assistant": ["assistant"],
|
||||
"system": ["system"],
|
||||
},
|
||||
)
|
||||
role_map = {}
|
||||
for target, sources in role_map_inv.items():
|
||||
for source in sources:
|
||||
role_map[source] = target
|
||||
|
||||
def transform_fn(sample, tokenizer=None):
|
||||
chat_template_string = get_chat_template(
|
||||
user_choice=chat_template_choice,
|
||||
jinja_template=chat_template_jinja,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
|
||||
chosen_raw = sample[field_chosen]
|
||||
rejected_raw = sample[field_rejected]
|
||||
|
||||
# Extract messages (all but last) and responses (last message)
|
||||
chosen_messages = [
|
||||
{
|
||||
"role": role_map[m[message_property_mappings["role"]]],
|
||||
"content": m[message_property_mappings["content"]],
|
||||
}
|
||||
for m in chosen_raw[:-1]
|
||||
]
|
||||
chosen_response = {
|
||||
"role": role_map[chosen_raw[-1][message_property_mappings["role"]]],
|
||||
"content": chosen_raw[-1][message_property_mappings["content"]],
|
||||
}
|
||||
|
||||
rejected_response = {
|
||||
"role": role_map[rejected_raw[-1][message_property_mappings["role"]]],
|
||||
"content": rejected_raw[-1][message_property_mappings["content"]],
|
||||
}
|
||||
|
||||
dummy_user_message = {"role": "user", "content": "[[dummy_message]]"}
|
||||
|
||||
result = {}
|
||||
result["prompt"] = tokenizer.apply_chat_template(
|
||||
chosen_messages,
|
||||
add_generation_prompt=True,
|
||||
chat_template=chat_template_string,
|
||||
tokenize=False,
|
||||
)
|
||||
|
||||
result["chosen"] = tokenizer.apply_chat_template(
|
||||
[dummy_user_message, chosen_response],
|
||||
add_generation_prompt=False,
|
||||
chat_template=chat_template_string,
|
||||
tokenize=False,
|
||||
)
|
||||
chosen_strip_index = result["chosen"].find(chosen_response["content"])
|
||||
result["chosen"] = result["chosen"][chosen_strip_index:].rstrip()
|
||||
|
||||
result["rejected"] = tokenizer.apply_chat_template(
|
||||
[dummy_user_message, rejected_response],
|
||||
add_generation_prompt=False,
|
||||
chat_template=chat_template_string,
|
||||
tokenize=False,
|
||||
)
|
||||
rejected_strip_index = result["rejected"].find(rejected_response["content"])
|
||||
result["rejected"] = result["rejected"][rejected_strip_index:].rstrip()
|
||||
|
||||
return result
|
||||
|
||||
return transform_fn, {"remove_columns": [field_chosen, field_rejected]}
|
||||
|
||||
@@ -40,6 +40,11 @@ from axolotl.utils.schemas.enums import RLType
|
||||
from axolotl.utils.train import determine_last_checkpoint
|
||||
from axolotl.utils.trainer import setup_trainer
|
||||
|
||||
try:
|
||||
from optimum.bettertransformer import BetterTransformer
|
||||
except ImportError:
|
||||
BetterTransformer = None
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from axolotl.core.builders import HFCausalTrainerBuilder, HFRLTrainerBuilder
|
||||
|
||||
@@ -136,6 +141,8 @@ def setup_signal_handler(
|
||||
def terminate_handler(_, __, model_weakref):
|
||||
if model_weakref() is not None:
|
||||
_model = model_weakref()
|
||||
if cfg.flash_optimum and BetterTransformer:
|
||||
_model = BetterTransformer.reverse(_model)
|
||||
_model.save_pretrained(
|
||||
cfg.output_dir, safe_serialization=safe_serialization
|
||||
)
|
||||
@@ -314,6 +321,9 @@ def save_trained_model(
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
elif cfg.local_rank == 0:
|
||||
if cfg.flash_optimum and BetterTransformer:
|
||||
model = BetterTransformer.reverse(model)
|
||||
|
||||
if cfg.rl and cfg.adapter and not cfg.rl_adapter_ref_model:
|
||||
trainer.model.save_pretrained(
|
||||
cfg.output_dir, safe_serialization=safe_serialization
|
||||
@@ -525,17 +535,6 @@ def setup_model_and_trainer(
|
||||
plugin_manager = PluginManager.get_instance()
|
||||
plugin_manager.post_trainer_create(cfg, trainer)
|
||||
|
||||
if cfg.use_ray:
|
||||
try:
|
||||
import ray.train.huggingface.transformers
|
||||
|
||||
trainer = ray.train.huggingface.transformers.prepare_trainer(trainer)
|
||||
except ImportError:
|
||||
LOG.warning(
|
||||
"The Ray integration with Hugging Face Transformers is not available. "
|
||||
"To use Ray, install the 'ray[train]' package."
|
||||
)
|
||||
|
||||
return (
|
||||
trainer,
|
||||
model,
|
||||
|
||||
@@ -17,13 +17,6 @@ def is_comet_available():
|
||||
return importlib.util.find_spec("comet_ml") is not None
|
||||
|
||||
|
||||
def is_opentelemetry_available():
|
||||
return (
|
||||
importlib.util.find_spec("opentelemetry") is not None
|
||||
and importlib.util.find_spec("prometheus_client") is not None
|
||||
)
|
||||
|
||||
|
||||
def get_pytorch_version() -> tuple[int, int, int]:
|
||||
"""
|
||||
Get Pytorch version as a tuple of (major, minor, patch).
|
||||
|
||||
@@ -16,8 +16,8 @@ import pandas as pd
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import wandb
|
||||
import yaml
|
||||
from datasets import load_dataset
|
||||
from optimum.bettertransformer import BetterTransformer
|
||||
from tqdm import tqdm
|
||||
from transformers import (
|
||||
GenerationConfig,
|
||||
@@ -28,6 +28,8 @@ from transformers import (
|
||||
TrainingArguments,
|
||||
)
|
||||
from transformers.trainer_utils import (
|
||||
PREFIX_CHECKPOINT_DIR,
|
||||
IntervalStrategy,
|
||||
SaveStrategy,
|
||||
)
|
||||
from trl.models import unwrap_model_for_generation
|
||||
@@ -54,6 +56,40 @@ IGNORE_INDEX = -100
|
||||
LOG = get_logger(__name__)
|
||||
|
||||
|
||||
class SaveBetterTransformerModelCallback(TrainerCallback):
|
||||
"""Callback to save the BetterTransformer wrapped model"""
|
||||
|
||||
def on_step_end(
|
||||
self,
|
||||
args: TrainingArguments,
|
||||
state: TrainerState,
|
||||
control: TrainerControl,
|
||||
**kwargs,
|
||||
) -> TrainerControl:
|
||||
# Save
|
||||
if (
|
||||
args.save_strategy == IntervalStrategy.STEPS
|
||||
and args.save_steps > 0
|
||||
and state.global_step % args.save_steps == 0
|
||||
):
|
||||
control.should_save = True
|
||||
|
||||
if control.should_save:
|
||||
checkpoint_folder = os.path.join(
|
||||
args.output_dir,
|
||||
f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}",
|
||||
)
|
||||
|
||||
model = BetterTransformer.reverse(kwargs["model"])
|
||||
model.save_pretrained(checkpoint_folder)
|
||||
# FIXME - need to cleanup old checkpoints
|
||||
|
||||
# since we're saving here, we don't need the trainer loop to attempt to save too b/c
|
||||
# the trainer will raise an exception since it can't save a BetterTransformer wrapped model
|
||||
control.should_save = False
|
||||
return control
|
||||
|
||||
|
||||
class LossWatchDogCallback(TrainerCallback):
|
||||
"""Callback to track loss and stop training if loss is too high"""
|
||||
|
||||
@@ -760,37 +796,6 @@ class SaveAxolotlConfigtoWandBCallback(TrainerCallback):
|
||||
except (FileNotFoundError, ConnectionError) as err:
|
||||
LOG.warning(f"Error while saving Axolotl config to WandB: {err}")
|
||||
|
||||
try:
|
||||
with open(self.axolotl_config_path, "r", encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
|
||||
chat_tpl = cfg.get("chat_template_jinja")
|
||||
if chat_tpl:
|
||||
with NamedTemporaryFile(
|
||||
mode="w", delete=True, suffix=".jinja", prefix="chat_template_"
|
||||
) as temp_ct_file:
|
||||
if (
|
||||
isinstance(chat_tpl, str)
|
||||
and os.path.exists(chat_tpl)
|
||||
and os.path.isfile(chat_tpl)
|
||||
):
|
||||
copyfile(chat_tpl, temp_ct_file.name)
|
||||
else:
|
||||
temp_ct_file.write(str(chat_tpl))
|
||||
temp_ct_file.flush()
|
||||
|
||||
artifact = wandb.Artifact(
|
||||
f"chat-template-{wandb.run.id}", type="jinja-template"
|
||||
)
|
||||
artifact.add_file(temp_ct_file.name)
|
||||
wandb.log_artifact(artifact)
|
||||
wandb.save(temp_ct_file.name)
|
||||
LOG.info(
|
||||
"The chat_template_jinja has been saved to the WandB run under files."
|
||||
)
|
||||
except (FileNotFoundError, ConnectionError, yaml.YAMLError) as err:
|
||||
LOG.warning(f"Error while saving chat_template_jinja to WandB: {err}")
|
||||
|
||||
if args.deepspeed:
|
||||
try:
|
||||
# sync config to top level in run, cannot delete file right away because wandb schedules it to be synced even w/policy = 'now', so let OS delete it later.
|
||||
|
||||
@@ -1,238 +0,0 @@
|
||||
"""OpenTelemetry metrics callback for Axolotl training"""
|
||||
|
||||
import threading
|
||||
from typing import Dict, Optional
|
||||
|
||||
from transformers import (
|
||||
TrainerCallback,
|
||||
TrainerControl,
|
||||
TrainerState,
|
||||
TrainingArguments,
|
||||
)
|
||||
|
||||
from axolotl.utils.logging import get_logger
|
||||
|
||||
LOG = get_logger(__name__)
|
||||
|
||||
try:
|
||||
from opentelemetry import metrics
|
||||
from opentelemetry.exporter.prometheus import PrometheusMetricReader
|
||||
from opentelemetry.metrics import set_meter_provider
|
||||
from opentelemetry.sdk.metrics import MeterProvider as SDKMeterProvider
|
||||
from prometheus_client import start_http_server
|
||||
|
||||
OPENTELEMETRY_AVAILABLE = True
|
||||
except ImportError:
|
||||
LOG.warning("OpenTelemetry not available. pip install [opentelemetry]")
|
||||
OPENTELEMETRY_AVAILABLE = False
|
||||
|
||||
|
||||
class OpenTelemetryMetricsCallback(TrainerCallback):
|
||||
"""
|
||||
TrainerCallback that exports training metrics to OpenTelemetry/Prometheus.
|
||||
|
||||
This callback automatically tracks key training metrics including:
|
||||
- Training loss
|
||||
- Evaluation loss
|
||||
- Learning rate
|
||||
- Epoch progress
|
||||
- Global step count
|
||||
- Gradient norm
|
||||
|
||||
Metrics are exposed via HTTP endpoint for Prometheus scraping.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg):
|
||||
if not OPENTELEMETRY_AVAILABLE:
|
||||
LOG.warning("OpenTelemetry not available, metrics will not be collected")
|
||||
self.metrics_enabled = False
|
||||
return
|
||||
|
||||
self.cfg = cfg
|
||||
self.metrics_host = getattr(cfg, "otel_metrics_host", "localhost")
|
||||
self.metrics_port = getattr(cfg, "otel_metrics_port", 8000)
|
||||
self.metrics_enabled = True
|
||||
self.server_started = False
|
||||
self.metrics_lock = threading.Lock()
|
||||
|
||||
try:
|
||||
# Create Prometheus metrics reader
|
||||
prometheus_reader = PrometheusMetricReader()
|
||||
|
||||
# Create meter provider with Prometheus exporter
|
||||
provider = SDKMeterProvider(metric_readers=[prometheus_reader])
|
||||
set_meter_provider(provider)
|
||||
|
||||
# Get meter for creating metrics
|
||||
self.meter = metrics.get_meter("axolotl.training")
|
||||
|
||||
# Create metrics
|
||||
self._create_metrics()
|
||||
|
||||
except Exception as e:
|
||||
LOG.warning(f"Failed to initialize OpenTelemetry metrics: {e}")
|
||||
self.metrics_enabled = False
|
||||
|
||||
def _create_metrics(self):
|
||||
"""Create all metrics that will be tracked"""
|
||||
self.train_loss_gauge = self.meter.create_gauge(
|
||||
name="axolotl_train_loss",
|
||||
description="Current training loss",
|
||||
unit="1",
|
||||
)
|
||||
|
||||
self.eval_loss_gauge = self.meter.create_gauge(
|
||||
name="axolotl_eval_loss",
|
||||
description="Current evaluation loss",
|
||||
unit="1",
|
||||
)
|
||||
|
||||
self.learning_rate_gauge = self.meter.create_gauge(
|
||||
name="axolotl_learning_rate",
|
||||
description="Current learning rate",
|
||||
unit="1",
|
||||
)
|
||||
|
||||
self.epoch_gauge = self.meter.create_gauge(
|
||||
name="axolotl_epoch",
|
||||
description="Current training epoch",
|
||||
unit="1",
|
||||
)
|
||||
|
||||
self.global_step_counter = self.meter.create_counter(
|
||||
name="axolotl_global_steps",
|
||||
description="Total training steps completed",
|
||||
unit="1",
|
||||
)
|
||||
|
||||
self.grad_norm_gauge = self.meter.create_gauge(
|
||||
name="axolotl_gradient_norm",
|
||||
description="Gradient norm",
|
||||
unit="1",
|
||||
)
|
||||
|
||||
self.memory_usage_gauge = self.meter.create_gauge(
|
||||
name="axolotl_memory_usage",
|
||||
description="Current memory usage in MB",
|
||||
unit="MB",
|
||||
)
|
||||
|
||||
def _start_metrics_server(self):
|
||||
"""Start the HTTP server for metrics exposure"""
|
||||
if self.server_started:
|
||||
return
|
||||
|
||||
try:
|
||||
start_http_server(self.metrics_port, addr=self.metrics_host)
|
||||
self.server_started = True
|
||||
LOG.info(
|
||||
f"OpenTelemetry metrics server started on http://{self.metrics_host}:{self.metrics_port}/metrics"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
LOG.error(f"Failed to start OpenTelemetry metrics server: {e}")
|
||||
|
||||
def on_train_begin(
|
||||
self,
|
||||
args: TrainingArguments,
|
||||
state: TrainerState,
|
||||
control: TrainerControl,
|
||||
**kwargs,
|
||||
):
|
||||
"""Called at the beginning of training"""
|
||||
if not self.metrics_enabled:
|
||||
return
|
||||
|
||||
self._start_metrics_server()
|
||||
LOG.info("OpenTelemetry metrics collection started")
|
||||
|
||||
def on_log(
|
||||
self,
|
||||
args: TrainingArguments,
|
||||
state: TrainerState,
|
||||
control: TrainerControl,
|
||||
logs: Optional[Dict[str, float]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Called when logging occurs"""
|
||||
if not self.metrics_enabled or not logs:
|
||||
return
|
||||
|
||||
if "loss" in logs:
|
||||
self.train_loss_gauge.set(logs["loss"])
|
||||
|
||||
if "eval_loss" in logs:
|
||||
self.eval_loss_gauge.set(logs["eval_loss"])
|
||||
|
||||
if "learning_rate" in logs:
|
||||
self.learning_rate_gauge.set(logs["learning_rate"])
|
||||
|
||||
if "epoch" in logs:
|
||||
self.epoch_gauge.set(logs["epoch"])
|
||||
|
||||
if "grad_norm" in logs:
|
||||
self.grad_norm_gauge.set(logs["grad_norm"])
|
||||
if "memory_usage" in logs:
|
||||
self.memory_usage_gauge.set(logs["memory_usage"])
|
||||
|
||||
def on_step_end(
|
||||
self,
|
||||
args: TrainingArguments,
|
||||
state: TrainerState,
|
||||
control: TrainerControl,
|
||||
**kwargs,
|
||||
):
|
||||
"""Called at the end of each training step"""
|
||||
if not self.metrics_enabled:
|
||||
return
|
||||
|
||||
# Update step counter and epoch
|
||||
self.global_step_counter.add(1)
|
||||
if state.epoch is not None:
|
||||
self.epoch_gauge.set(state.epoch)
|
||||
|
||||
def on_evaluate(
|
||||
self,
|
||||
args: TrainingArguments,
|
||||
state: TrainerState,
|
||||
control: TrainerControl,
|
||||
metrics: Optional[Dict[str, float]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Called after evaluation"""
|
||||
if not self.metrics_enabled or not metrics:
|
||||
return
|
||||
|
||||
if "eval_loss" in metrics:
|
||||
self.eval_loss_gauge.set(metrics["eval_loss"])
|
||||
|
||||
# Record any other eval metrics as gauges
|
||||
for key, value in metrics.items():
|
||||
if key.startswith("eval_") and isinstance(value, (int, float)):
|
||||
# Create gauge for this metric if it doesn't exist
|
||||
gauge_name = f"axolotl_{key}"
|
||||
try:
|
||||
gauge = self.meter.create_gauge(
|
||||
name=gauge_name,
|
||||
description=f"Evaluation metric: {key}",
|
||||
unit="1",
|
||||
)
|
||||
gauge.set(value)
|
||||
except Exception as e:
|
||||
LOG.warning(f"Failed to create/update metric {gauge_name}: {e}")
|
||||
|
||||
def on_train_end(
|
||||
self,
|
||||
args: TrainingArguments,
|
||||
state: TrainerState,
|
||||
control: TrainerControl,
|
||||
**kwargs,
|
||||
):
|
||||
"""Called at the end of training"""
|
||||
if not self.metrics_enabled:
|
||||
return
|
||||
|
||||
LOG.info("Training completed. OpenTelemetry metrics collection finished.")
|
||||
LOG.info(
|
||||
f"Metrics are still available at http://{self.metrics_host}:{self.metrics_port}/metrics"
|
||||
)
|
||||
@@ -113,7 +113,7 @@ def _map_dataset(
|
||||
|
||||
dataset = dataset.map(
|
||||
ds_transform_fn,
|
||||
num_proc=cfg.dataset_num_proc,
|
||||
num_proc=cfg.dataset_processes,
|
||||
load_from_cache_file=not cfg.is_preprocess,
|
||||
desc="Mapping RL Dataset",
|
||||
**map_kwargs,
|
||||
@@ -234,7 +234,7 @@ def _load_split(cfg: DictDefault, split: Literal["train", "test"]) -> Dataset:
|
||||
prior_len = len(split_datasets[i])
|
||||
split_datasets[i] = split_datasets[i].filter(
|
||||
drop_long,
|
||||
num_proc=cfg.dataset_num_proc,
|
||||
num_proc=cfg.dataset_processes,
|
||||
load_from_cache_file=not cfg.is_preprocess,
|
||||
desc="Dropping Long Sequences",
|
||||
)
|
||||
|
||||
@@ -239,11 +239,6 @@ def _load_from_local_path(
|
||||
return load_dataset(dataset_config.path, **load_dataset_kwargs)
|
||||
elif local_path.is_file():
|
||||
dataset_type = get_dataset_type(dataset_config)
|
||||
|
||||
# For single file datasets, HF always creates only a "train" split
|
||||
if dataset_type in ("json", "csv", "text"):
|
||||
load_dataset_kwargs["split"] = "train"
|
||||
|
||||
return load_dataset(
|
||||
dataset_type,
|
||||
data_files=dataset_config.path,
|
||||
@@ -414,7 +409,7 @@ def save_preprocessed_dataset(
|
||||
) -> None:
|
||||
"""Save preprocessed dataset to disk and optionally push to the HF Hub."""
|
||||
prepared_ds_path = get_prepared_dataset_path(cfg, dataset_hash)
|
||||
num_workers = cfg.dataset_num_proc or get_default_process_count()
|
||||
num_workers = cfg.dataset_processes or get_default_process_count()
|
||||
if isinstance(dataset, IterableDataset):
|
||||
ds_from_iter = Dataset.from_generator(
|
||||
functools.partial(_generate_from_iterable_dataset, dataset),
|
||||
|
||||
@@ -223,7 +223,7 @@ def handle_long_seq_in_dataset(
|
||||
|
||||
filter_map_kwargs = {}
|
||||
if not isinstance(dataset, IterableDataset):
|
||||
filter_map_kwargs["num_proc"] = cfg.dataset_num_proc
|
||||
filter_map_kwargs["num_proc"] = cfg.dataset_processes
|
||||
filter_map_kwargs["load_from_cache_file"] = not cfg.is_preprocess
|
||||
|
||||
drop_long_kwargs = {}
|
||||
|
||||
@@ -80,7 +80,7 @@ def get_dataset_wrapper(
|
||||
"""
|
||||
# Common parameters for dataset wrapping
|
||||
dataset_kwargs: dict[str, Any] = {
|
||||
"process_count": cfg.dataset_num_proc,
|
||||
"process_count": cfg.dataset_processes,
|
||||
"keep_in_memory": cfg.dataset_keep_in_memory is True,
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user