Compare commits
92 Commits
fix/issue-
...
uv-first
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ffb307a8a7 | ||
|
|
915c258c6e | ||
|
|
1e58235c38 | ||
|
|
5753c5b89c | ||
|
|
18d78f02cf | ||
|
|
923181aaed | ||
|
|
786f1a3ff9 | ||
|
|
26418e6f9a | ||
|
|
19fe84ef46 | ||
|
|
98730868e7 | ||
|
|
5771a65b88 | ||
|
|
f912d1bb97 | ||
|
|
0250e5f87c | ||
|
|
274c579d81 | ||
|
|
ccd2f12335 | ||
|
|
00e0238501 | ||
|
|
f782957002 | ||
|
|
f2f66f2bb9 | ||
|
|
013474eb70 | ||
|
|
6dc9816722 | ||
|
|
74715125b6 | ||
|
|
f0f3bfbdf0 | ||
|
|
022ef7ab4e | ||
|
|
04533b79d4 | ||
|
|
19de29be19 | ||
|
|
ec75aa5889 | ||
|
|
cf4e3fac64 | ||
|
|
69df309cbb | ||
|
|
b436ecf61f | ||
|
|
f137ce50ec | ||
|
|
4131bcf769 | ||
|
|
64fea39978 | ||
|
|
4966496b98 | ||
|
|
66a9e4fced | ||
|
|
15d35b76bb | ||
|
|
0d53e0fe8f | ||
|
|
9344fa5e8c | ||
|
|
c702edae5f | ||
|
|
dfaf76659f | ||
|
|
26a58bb8af | ||
|
|
cec2490903 | ||
|
|
dfa5224908 | ||
|
|
ddafc6ef80 | ||
|
|
ad56e600e3 | ||
|
|
18d9456297 | ||
|
|
da5ede6372 | ||
|
|
6cbca1ffb2 | ||
|
|
2e082d47cc | ||
|
|
b4c6675cd2 | ||
|
|
828131332a | ||
|
|
273a03f85c | ||
|
|
9bbe2cfe0f | ||
|
|
64da8f0044 | ||
|
|
1fa0a98e38 | ||
|
|
8d542d9d63 | ||
|
|
a4565476e0 | ||
|
|
02dc263338 | ||
|
|
2acd3e1242 | ||
|
|
0437c1a4ba | ||
|
|
ef150fd973 | ||
|
|
47ad92c6b9 | ||
|
|
f0fee9c56c | ||
|
|
37d07bd7f7 | ||
|
|
4c81172917 | ||
|
|
cd8c769e84 | ||
|
|
0d60046d08 | ||
|
|
c110e3eb48 | ||
|
|
95c259b3fb | ||
|
|
d1fd505813 | ||
|
|
1334281d50 | ||
|
|
98f230d864 | ||
|
|
02f308351c | ||
|
|
3b91e8174d | ||
|
|
40d906fb33 | ||
|
|
89d5323c13 | ||
|
|
df870f6a8f | ||
|
|
f500aaa490 | ||
|
|
9ec33f52e3 | ||
|
|
b453562c01 | ||
|
|
367f7eb3a6 | ||
|
|
e888e38ce7 | ||
|
|
400120af2d | ||
|
|
459e5f9b16 | ||
|
|
43f6f84269 | ||
|
|
36c4ab11f9 | ||
|
|
2f4e4ef604 | ||
|
|
aee03fc636 | ||
|
|
255b818fbc | ||
|
|
332ee74f32 | ||
|
|
3b0d2ac5c0 | ||
|
|
9462a1bf79 | ||
|
|
8e9386c799 |
@@ -2,7 +2,6 @@
|
|||||||
source = axolotl
|
source = axolotl
|
||||||
omit =
|
omit =
|
||||||
*/tests/*
|
*/tests/*
|
||||||
setup.py
|
|
||||||
|
|
||||||
[report]
|
[report]
|
||||||
exclude_lines =
|
exclude_lines =
|
||||||
|
|||||||
17
.github/CONTRIBUTING.md
vendored
17
.github/CONTRIBUTING.md
vendored
@@ -29,13 +29,18 @@ PRs are **greatly welcome**!
|
|||||||
2. Set up the development environment by following the instructions in the [README.md](https://github.com/axolotl-ai-cloud/axolotl/tree/main/README.md) file.
|
2. Set up the development environment by following the instructions in the [README.md](https://github.com/axolotl-ai-cloud/axolotl/tree/main/README.md) file.
|
||||||
3. Explore the codebase, run tests, and verify that everything works as expected.
|
3. Explore the codebase, run tests, and verify that everything works as expected.
|
||||||
|
|
||||||
Please run below to setup env
|
Please run the below to setup:
|
||||||
```bash
|
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
|
||||||
pre-commit install
|
|
||||||
|
|
||||||
# test
|
```bash
|
||||||
pytest tests/
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
|
cd axolotl
|
||||||
|
|
||||||
|
uv sync --dev && uv pip install flash-attn --no-build-isolation
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
pre-commit install # install pre-commit hooks
|
||||||
|
|
||||||
|
pytest tests/ # optional; run test suite
|
||||||
```
|
```
|
||||||
|
|
||||||
## How to Contribute
|
## How to Contribute
|
||||||
|
|||||||
11
.github/workflows/base.yml
vendored
11
.github/workflows/base.yml
vendored
@@ -39,13 +39,6 @@ jobs:
|
|||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
dockerfile: "Dockerfile-base"
|
dockerfile: "Dockerfile-base"
|
||||||
- cuda: "126"
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
cudnn_version: ""
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.0
|
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
|
||||||
dockerfile: "Dockerfile-base"
|
|
||||||
- cuda: "126"
|
- cuda: "126"
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
cudnn_version: ""
|
cudnn_version: ""
|
||||||
@@ -105,7 +98,9 @@ jobs:
|
|||||||
context: .
|
context: .
|
||||||
file: ./docker/${{ matrix.dockerfile }}
|
file: ./docker/${{ matrix.dockerfile }}
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
tags: |
|
||||||
|
${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
|
${{ steps.metadata.outputs.tags }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
labels: ${{ steps.metadata.outputs.labels }}
|
labels: ${{ steps.metadata.outputs.labels }}
|
||||||
build-args: |
|
build-args: |
|
||||||
CUDA_VERSION=${{ matrix.cuda_version }}
|
CUDA_VERSION=${{ matrix.cuda_version }}
|
||||||
|
|||||||
8
.github/workflows/docs.yml
vendored
8
.github/workflows/docs.yml
vendored
@@ -20,10 +20,14 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python3 -m pip install jupyter quartodoc
|
uv pip install --system jupyter quartodoc
|
||||||
python3 -m pip install -e .
|
uv pip install --system -e .
|
||||||
- name: Build autodoc
|
- name: Build autodoc
|
||||||
run: quartodoc build
|
run: quartodoc build
|
||||||
- name: Publish to GitHub Pages (and render)
|
- name: Publish to GitHub Pages (and render)
|
||||||
|
|||||||
3
.github/workflows/lint.yml
vendored
3
.github/workflows/lint.yml
vendored
@@ -6,7 +6,7 @@ on:
|
|||||||
types: [opened, synchronize, reopened, ready_for_review]
|
types: [opened, synchronize, reopened, ready_for_review]
|
||||||
paths:
|
paths:
|
||||||
- '**.py'
|
- '**.py'
|
||||||
- 'requirements.txt'
|
- 'pyproject.toml'
|
||||||
- '.github/workflows/*.yml'
|
- '.github/workflows/*.yml'
|
||||||
- "*.[q]md"
|
- "*.[q]md"
|
||||||
- "examples/**/*.y[a]?ml"
|
- "examples/**/*.y[a]?ml"
|
||||||
@@ -23,5 +23,4 @@ jobs:
|
|||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
cache: 'pip' # caching pip dependencies
|
|
||||||
- uses: pre-commit/action@v3.0.1
|
- uses: pre-commit/action@v3.0.1
|
||||||
|
|||||||
16
.github/workflows/main.yml
vendored
16
.github/workflows/main.yml
vendored
@@ -20,11 +20,6 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.0
|
|
||||||
axolotl_extras:
|
|
||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -73,6 +68,8 @@ jobs:
|
|||||||
PYTORCH_VERSION=${{ matrix.pytorch }}
|
PYTORCH_VERSION=${{ matrix.pytorch }}
|
||||||
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
||||||
AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}
|
AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}
|
||||||
|
GIT_REF=${{ github.ref }}
|
||||||
|
GIT_SHA=${{ github.sha }}
|
||||||
file: ./docker/Dockerfile
|
file: ./docker/Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
@@ -93,11 +90,6 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.0
|
|
||||||
axolotl_extras:
|
|
||||||
- cuda: 126
|
- cuda: 126
|
||||||
cuda_version: 12.6.3
|
cuda_version: 12.6.3
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -148,6 +140,8 @@ jobs:
|
|||||||
build-args: |
|
build-args: |
|
||||||
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
CUDA=${{ matrix.cuda }}
|
CUDA=${{ matrix.cuda }}
|
||||||
|
GIT_REF=${{ github.ref }}
|
||||||
|
GIT_SHA=${{ github.sha }}
|
||||||
file: ./docker/Dockerfile-cloud
|
file: ./docker/Dockerfile-cloud
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
@@ -213,6 +207,8 @@ jobs:
|
|||||||
build-args: |
|
build-args: |
|
||||||
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
BASE_TAG=${{ github.ref_type == 'tag' && 'main' || github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
CUDA=${{ matrix.cuda }}
|
CUDA=${{ matrix.cuda }}
|
||||||
|
GIT_REF=${{ github.ref }}
|
||||||
|
GIT_SHA=${{ github.sha }}
|
||||||
file: ./docker/Dockerfile-cloud-no-tmux
|
file: ./docker/Dockerfile-cloud-no-tmux
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
|
|||||||
12
.github/workflows/multi-gpu-e2e.yml
vendored
12
.github/workflows/multi-gpu-e2e.yml
vendored
@@ -4,8 +4,6 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- 'tests/e2e/multigpu/**.py'
|
- 'tests/e2e/multigpu/**.py'
|
||||||
- 'requirements.txt'
|
|
||||||
- 'setup.py'
|
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
- '.github/workflows/multi-gpu-e2e.yml'
|
- '.github/workflows/multi-gpu-e2e.yml'
|
||||||
- 'src/axolotl/core/trainers/mixins/sequence_parallel.py'
|
- 'src/axolotl/core/trainers/mixins/sequence_parallel.py'
|
||||||
@@ -56,13 +54,17 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==1.0.2 jinja2
|
pip install modal==1.0.2 jinja2 protobuf
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
@@ -72,4 +74,4 @@ jobs:
|
|||||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.multigpu
|
modal run -m cicd.multigpu
|
||||||
|
|||||||
4
.github/workflows/nightlies.yml
vendored
4
.github/workflows/nightlies.yml
vendored
@@ -52,6 +52,8 @@ jobs:
|
|||||||
CUDA=${{ matrix.cuda }}
|
CUDA=${{ matrix.cuda }}
|
||||||
PYTORCH_VERSION=${{ matrix.pytorch }}
|
PYTORCH_VERSION=${{ matrix.pytorch }}
|
||||||
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
||||||
|
GIT_REF=${{ github.ref }}
|
||||||
|
GIT_SHA=${{ github.sha }}
|
||||||
file: ./docker/Dockerfile
|
file: ./docker/Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
@@ -102,6 +104,8 @@ jobs:
|
|||||||
build-args: |
|
build-args: |
|
||||||
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
CUDA=${{ matrix.cuda }}
|
CUDA=${{ matrix.cuda }}
|
||||||
|
GIT_REF=${{ github.ref }}
|
||||||
|
GIT_SHA=${{ github.sha }}
|
||||||
file: ./docker/Dockerfile-cloud
|
file: ./docker/Dockerfile-cloud
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
|
|||||||
7
.github/workflows/precommit-autoupdate.yml
vendored
7
.github/workflows/precommit-autoupdate.yml
vendored
@@ -18,10 +18,15 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
|
|
||||||
- name: Update pre-commit hooks
|
- name: Update pre-commit hooks
|
||||||
id: update
|
id: update
|
||||||
run: |
|
run: |
|
||||||
pip install pre-commit
|
uv pip install --system pre-commit
|
||||||
pre-commit autoupdate
|
pre-commit autoupdate
|
||||||
if [[ -n $(git status --porcelain) ]]; then
|
if [[ -n $(git status --porcelain) ]]; then
|
||||||
echo "changes=true" >> $GITHUB_OUTPUT
|
echo "changes=true" >> $GITHUB_OUTPUT
|
||||||
|
|||||||
9
.github/workflows/preview-docs.yml
vendored
9
.github/workflows/preview-docs.yml
vendored
@@ -40,10 +40,15 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python3 -m pip install jupyter quartodoc
|
uv pip install --system jupyter quartodoc
|
||||||
python3 -m pip install -e .
|
uv pip install --system -e .
|
||||||
|
|
||||||
- name: Build autodoc
|
- name: Build autodoc
|
||||||
run: quartodoc build
|
run: quartodoc build
|
||||||
|
|||||||
21
.github/workflows/pypi.yml
vendored
21
.github/workflows/pypi.yml
vendored
@@ -38,23 +38,24 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip3 install wheel packaging==23.2
|
uv pip install --system wheel packaging==23.2
|
||||||
pip3 install --no-build-isolation -e .
|
uv pip install --system --no-build-isolation -e ".[dev]"
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
|
||||||
|
|
||||||
- name: Extract tag name
|
- name: Extract tag name
|
||||||
id: tag
|
id: tag
|
||||||
run: echo ::set-output name=TAG_NAME::$(echo $GITHUB_REF | cut -d / -f 3)
|
run: echo "TAG_NAME=$(echo "$GITHUB_REF" | cut -d / -f 3)" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
- name: Update version in setup.py
|
- name: Build package
|
||||||
run: |
|
run: |
|
||||||
sed -i -E 's/version="([0-9.]+)",/version="${{ steps.tag.outputs.TAG_NAME }}",/g' setup.py
|
uv pip install --system build
|
||||||
|
python -m build
|
||||||
- name: Build a source dist
|
|
||||||
run: |
|
|
||||||
python setup.py sdist
|
|
||||||
|
|
||||||
- name: Publish package distributions to PyPI
|
- name: Publish package distributions to PyPI
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
|
|||||||
52
.github/workflows/tests-nightly.yml
vendored
52
.github/workflows/tests-nightly.yml
vendored
@@ -13,7 +13,6 @@ jobs:
|
|||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
cache: 'pip' # caching pip dependencies
|
|
||||||
- uses: pre-commit/action@v3.0.1
|
- uses: pre-commit/action@v3.0.1
|
||||||
env:
|
env:
|
||||||
SKIP: no-commit-to-branch
|
SKIP: no-commit-to-branch
|
||||||
@@ -43,32 +42,30 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python_version }}
|
python-version: ${{ matrix.python_version }}
|
||||||
cache: 'pip' # caching pip dependencies
|
|
||||||
|
|
||||||
- name: upgrade pip
|
- name: Install uv
|
||||||
run: |
|
uses: astral-sh/setup-uv@v4
|
||||||
pip3 install --upgrade pip
|
with:
|
||||||
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
|
version: "latest"
|
||||||
|
|
||||||
- name: Install PyTorch
|
- name: Install PyTorch
|
||||||
run: |
|
run: |
|
||||||
pip3 install torch==${{ matrix.pytorch_version }} torchvision
|
uv pip install --system torch==${{ matrix.pytorch_version }} torchvision
|
||||||
|
|
||||||
- name: Update requirements.txt
|
- name: Update pyproject.toml for nightly builds
|
||||||
run: |
|
run: |
|
||||||
sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt
|
sed -i 's#"transformers==.*"#"transformers @ git+https://github.com/huggingface/transformers.git@main"#' pyproject.toml
|
||||||
sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt
|
sed -i 's#"peft==.*"#"peft @ git+https://github.com/huggingface/peft.git@main"#' pyproject.toml
|
||||||
sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt
|
sed -i 's#"accelerate==.*"#"accelerate @ git+https://github.com/huggingface/accelerate.git@main"#' pyproject.toml
|
||||||
sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt
|
sed -i 's#"trl==.*"#"trl @ git+https://github.com/huggingface/trl.git@main"#' pyproject.toml
|
||||||
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt
|
sed -i 's#"datasets==.*"#"datasets @ git+https://github.com/huggingface/datasets.git@main"#' pyproject.toml
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip3 show torch
|
uv pip show --system torch
|
||||||
pip3 install --no-build-isolation -U -e .
|
uv pip install --system --no-build-isolation -e ".[dev]"
|
||||||
python scripts/unsloth_install.py | sh
|
python scripts/unsloth_install.py | sh
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
- name: Make sure PyTorch version wasn't clobbered
|
||||||
run: |
|
run: |
|
||||||
@@ -84,9 +81,6 @@ jobs:
|
|||||||
pytest -v --durations=10 tests/patched/
|
pytest -v --durations=10 tests/patched/
|
||||||
pytest -v --durations=10 tests/cli/
|
pytest -v --durations=10 tests/cli/
|
||||||
|
|
||||||
- name: cleanup pip cache
|
|
||||||
run: |
|
|
||||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
|
||||||
|
|
||||||
docker-e2e-tests:
|
docker-e2e-tests:
|
||||||
if: github.repository_owner == 'axolotl-ai-cloud'
|
if: github.repository_owner == 'axolotl-ai-cloud'
|
||||||
@@ -120,13 +114,16 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
uv pip install --system modal==1.0.2 jinja2
|
||||||
pip install modal==1.0.2 jinja2
|
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
@@ -136,7 +133,7 @@ jobs:
|
|||||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.e2e_tests
|
modal run -m cicd.e2e_tests
|
||||||
docker-e2e-multigpu-tests:
|
docker-e2e-multigpu-tests:
|
||||||
if: github.repository_owner == 'axolotl-ai-cloud'
|
if: github.repository_owner == 'axolotl-ai-cloud'
|
||||||
# this job needs to be run on self-hosted GPU runners...
|
# this job needs to be run on self-hosted GPU runners...
|
||||||
@@ -162,13 +159,16 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
uv pip install --system modal==1.0.2 jinja2
|
||||||
pip install modal==1.0.2 jinja2
|
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
|
|||||||
98
.github/workflows/tests.yml
vendored
98
.github/workflows/tests.yml
vendored
@@ -7,18 +7,16 @@ on:
|
|||||||
- "main"
|
- "main"
|
||||||
paths:
|
paths:
|
||||||
- '**.py'
|
- '**.py'
|
||||||
- 'requirements.txt'
|
- 'pyproject.toml'
|
||||||
- '.github/workflows/*.yml'
|
- '.github/workflows/*.yml'
|
||||||
- 'requirements-tests.txt'
|
|
||||||
- 'cicd/cicd.sh'
|
- 'cicd/cicd.sh'
|
||||||
- 'cicd/Dockerfile.jinja'
|
- 'cicd/Dockerfile.jinja'
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize, reopened, ready_for_review]
|
types: [opened, synchronize, reopened, ready_for_review]
|
||||||
paths:
|
paths:
|
||||||
- '**.py'
|
- '**.py'
|
||||||
- 'requirements.txt'
|
- 'pyproject.toml'
|
||||||
- '.github/workflows/*.yml'
|
- '.github/workflows/*.yml'
|
||||||
- 'requirements-tests.txt'
|
|
||||||
- 'cicd/cicd.sh'
|
- 'cicd/cicd.sh'
|
||||||
- 'cicd/Dockerfile.jinja'
|
- 'cicd/Dockerfile.jinja'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
@@ -41,7 +39,6 @@ jobs:
|
|||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
cache: 'pip' # caching pip dependencies
|
|
||||||
- uses: pre-commit/action@v3.0.1
|
- uses: pre-commit/action@v3.0.1
|
||||||
env:
|
env:
|
||||||
SKIP: no-commit-to-branch
|
SKIP: no-commit-to-branch
|
||||||
@@ -72,24 +69,25 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python_version }}
|
python-version: ${{ matrix.python_version }}
|
||||||
cache: 'pip' # caching pip dependencies
|
|
||||||
|
|
||||||
- name: upgrade pip
|
- name: Install uv
|
||||||
run: |
|
uses: astral-sh/setup-uv@v4
|
||||||
pip3 install --upgrade pip
|
with:
|
||||||
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
|
version: "latest"
|
||||||
|
|
||||||
- name: Install PyTorch
|
- name: Install PyTorch
|
||||||
run: |
|
run: |
|
||||||
pip3 install --no-cache-dir torch==${{ matrix.pytorch_version }} torchvision
|
uv pip install --system torch==${{ matrix.pytorch_version }} torchvision
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip3 show torch
|
uv pip show --system torch
|
||||||
pip3 install --no-cache-dir --no-build-isolation -U -e .
|
uv pip install --system wheel
|
||||||
python scripts/unsloth_install.py | sh
|
printf "torch==${{ matrix.pytorch_version }}\n" > torch-constraints.txt
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
uv pip install --system --no-cache-dir --no-build-isolation -e ".[dev]" --constraints torch-constraints.txt
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
set -o pipefail
|
||||||
|
python scripts/unsloth_install.py | bash
|
||||||
|
python scripts/cutcrossentropy_install.py | bash
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
- name: Make sure PyTorch version wasn't clobbered
|
||||||
run: |
|
run: |
|
||||||
@@ -105,10 +103,10 @@ jobs:
|
|||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
pytest -v --durations=10 -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/ --cov=axolotl --cov-report=xml
|
python -m pytest -v --durations=10 -n 8 --dist loadfile --cov=axolotl --cov-report=xml --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/
|
||||||
pytest -v --durations=10 tests/monkeypatch/ --cov=axolotl --cov-append --cov-report=xml
|
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/monkeypatch/
|
||||||
pytest -v --durations=10 tests/patched/ --cov=axolotl --cov-append --cov-report=xml
|
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/patched/
|
||||||
pytest -v --durations=10 tests/cli/ --cov=axolotl --cov-append --cov-report=xml
|
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/cli/
|
||||||
|
|
||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v5
|
||||||
@@ -118,9 +116,6 @@ jobs:
|
|||||||
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
||||||
fail_ci_if_error: false
|
fail_ci_if_error: false
|
||||||
|
|
||||||
- name: cleanup pip cache
|
|
||||||
run: |
|
|
||||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
|
||||||
|
|
||||||
pytest-sdist:
|
pytest-sdist:
|
||||||
name: PyTest from Source Dist
|
name: PyTest from Source Dist
|
||||||
@@ -147,25 +142,26 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python_version }}
|
python-version: ${{ matrix.python_version }}
|
||||||
cache: 'pip' # caching pip dependencies
|
|
||||||
|
|
||||||
- name: upgrade pip
|
- name: Install uv
|
||||||
run: |
|
uses: astral-sh/setup-uv@v4
|
||||||
pip3 install --upgrade pip
|
with:
|
||||||
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 setuptools_scm build wheel
|
version: "latest"
|
||||||
|
|
||||||
- name: Install PyTorch
|
- name: Install PyTorch
|
||||||
run: |
|
run: |
|
||||||
pip3 install --no-cache-dir torch==${{ matrix.pytorch_version }} torchvision
|
uv pip install --system torch==${{ matrix.pytorch_version }} torchvision
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip3 show torch
|
uv pip show --system torch
|
||||||
python -m build --no-isolation --sdist
|
uv pip install --system wheel build setuptools_scm
|
||||||
pip3 install --no-cache-dir --no-build-isolation dist/axolotl*.tar.gz
|
python -m build --sdist
|
||||||
|
printf "torch==${{ matrix.pytorch_version }}\n" > torch-constraints.txt
|
||||||
|
tarball_path=$(echo dist/axolotl*.tar.gz)
|
||||||
|
uv pip install --no-cache-dir --no-build-isolation --system "${tarball_path}[dev]" --constraints torch-constraints.txt
|
||||||
python scripts/unsloth_install.py | sh
|
python scripts/unsloth_install.py | sh
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
- name: Make sure PyTorch version wasn't clobbered
|
||||||
run: |
|
run: |
|
||||||
@@ -180,13 +176,9 @@ jobs:
|
|||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
pytest -v --durations=10 -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/ --cov=axolotl --cov-report=xml
|
python -m pytest -v --durations=10 -n 8 --dist loadfile --cov=axolotl --cov-report=xml --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ --ignore=tests/monkeypatch/ tests/
|
||||||
pytest -v --durations=10 tests/monkeypatch/ --cov=axolotl --cov-append --cov-report=xml
|
python -m pytest -v --durations=10 -n 8 --cov=axolotl --cov-append --cov-report=xml tests/monkeypatch/
|
||||||
pytest -v --durations=10 tests/cli/
|
python -m pytest -v --durations=10 -n 8 tests/cli/
|
||||||
|
|
||||||
- name: cleanup pip cache
|
|
||||||
run: |
|
|
||||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
|
||||||
|
|
||||||
gate-skip-e2e:
|
gate-skip-e2e:
|
||||||
needs: [pre-commit, pytest, pytest-sdist]
|
needs: [pre-commit, pytest, pytest-sdist]
|
||||||
@@ -243,7 +235,7 @@ jobs:
|
|||||||
pytorch: 2.7.1
|
pytorch: 2.7.1
|
||||||
num_gpus: 1
|
num_gpus: 1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
dockerfile: "Dockerfile-uv.jinja"
|
dockerfile: "Dockerfile.jinja"
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -251,13 +243,17 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==1.0.2 jinja2
|
pip install modal==1.0.2 jinja2 protobuf
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=${{ github.ref_name }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
@@ -312,13 +308,17 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==1.0.2 jinja2
|
pip install modal==1.0.2 jinja2 protobuf
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=${{ github.ref_name }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
@@ -355,13 +355,17 @@ jobs:
|
|||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v4
|
||||||
|
with:
|
||||||
|
version: "latest"
|
||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==1.0.2 jinja2
|
pip install modal==1.0.2 jinja2 protobuf
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=${{ github.ref_name }}-base-uv-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -191,5 +191,5 @@ out/
|
|||||||
# vim
|
# vim
|
||||||
*.swp
|
*.swp
|
||||||
|
|
||||||
# scm auto-versioning
|
# setuptools-scm generated version file
|
||||||
src/axolotl/_version.py
|
src/axolotl/_version.py
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
FROM axolotlai/axolotl-cloud:main-py3.11-cu124-2.6.0
|
FROM axolotlai/axolotl-cloud:main-py3.11-cu124-2.6.0
|
||||||
|
|
||||||
COPY .runpod/requirements.txt /requirements.txt
|
COPY .runpod/requirements.txt /requirements.txt
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
||||||
python3 -m pip install --upgrade pip && \
|
/root/.local/bin/uv pip install --system -r /requirements.txt
|
||||||
python3 -m pip install --upgrade -r /requirements.txt
|
|
||||||
|
|
||||||
# Environment settings
|
# Environment settings
|
||||||
ARG BASE_VOLUME="/runpod-volume"
|
ARG BASE_VOLUME="/runpod-volume"
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
include requirements.txt
|
include pyproject.toml
|
||||||
include README.md
|
include README.md
|
||||||
include LICENSE
|
include LICENSE
|
||||||
include src/setuptools_axolotl_dynamic_dependencies.py
|
|
||||||
include src/axolotl/utils/chat_templates/templates/*.jinja
|
include src/axolotl/utils/chat_templates/templates/*.jinja
|
||||||
recursive-include axolotl *.py
|
recursive-include src/axolotl *.py
|
||||||
|
|||||||
36
README.md
36
README.md
@@ -65,15 +65,9 @@ Features:
|
|||||||
- **Flexible Dataset Handling**: Load from local, HuggingFace, and cloud (S3, Azure, GCP, OCI) datasets.
|
- **Flexible Dataset Handling**: Load from local, HuggingFace, and cloud (S3, Azure, GCP, OCI) datasets.
|
||||||
- **Cloud Ready**: We ship [Docker images](https://hub.docker.com/u/axolotlai) and also [PyPI packages](https://pypi.org/project/axolotl/) for use on cloud platforms and local hardware.
|
- **Cloud Ready**: We ship [Docker images](https://hub.docker.com/u/axolotlai) and also [PyPI packages](https://pypi.org/project/axolotl/) for use on cloud platforms and local hardware.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## 🚀 Quick Start - LLM Fine-tuning in Minutes
|
## 🚀 Quick Start - LLM Fine-tuning in Minutes
|
||||||
|
|
||||||
**Requirements**:
|
**Requirements**: NVIDIA GPU (Ampere+) or AMD GPU, Python 3.11+
|
||||||
|
|
||||||
- NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
|
|
||||||
- Python 3.11
|
|
||||||
- PyTorch ≥2.6.0
|
|
||||||
|
|
||||||
### Google Colab
|
### Google Colab
|
||||||
|
|
||||||
@@ -81,15 +75,35 @@ Features:
|
|||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
|
|
||||||
#### Using pip
|
#### Project setup (uv add)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip3 install -U packaging==23.2 setuptools==75.8.0 wheel ninja
|
# Install uv
|
||||||
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
|
||||||
|
# Initialize or enter your project
|
||||||
|
uv init my-project && cd my-project
|
||||||
|
uv add axolotl
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
# Download example axolotl configs, deepspeed configs
|
# Download example axolotl configs, deepspeed configs
|
||||||
axolotl fetch examples
|
axolotl fetch examples
|
||||||
axolotl fetch deepspeed_configs # OPTIONAL
|
axolotl fetch deepspeed_configs # optional
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Quick try (uv pip)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install uv if needed
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
|
||||||
|
uv pip install axolotl
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
|
# Download example axolotl configs, deepspeed configs
|
||||||
|
axolotl fetch examples
|
||||||
|
axolotl fetch deepspeed_configs # optional
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Using Docker
|
#### Using Docker
|
||||||
|
|||||||
@@ -1,52 +0,0 @@
|
|||||||
FROM axolotlai/axolotl-base-uv:{{ BASE_TAG }}
|
|
||||||
|
|
||||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
|
||||||
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
|
||||||
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
|
||||||
ENV CUDA="{{ CUDA }}"
|
|
||||||
ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
|
|
||||||
ENV GITHUB_REF="{{ GITHUB_REF }}"
|
|
||||||
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
|
||||||
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
|
||||||
ENV HF_HOME="{{ HF_HOME }}"
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm
|
|
||||||
|
|
||||||
WORKDIR /workspace
|
|
||||||
|
|
||||||
RUN git clone --depth=1 https://github.com/axolotl-ai-cloud/axolotl.git
|
|
||||||
|
|
||||||
WORKDIR /workspace/axolotl
|
|
||||||
|
|
||||||
RUN git fetch origin +$GITHUB_REF && \
|
|
||||||
git checkout FETCH_HEAD
|
|
||||||
|
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
|
||||||
RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
|
|
||||||
sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt; \
|
|
||||||
sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt; \
|
|
||||||
sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt; \
|
|
||||||
sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt; \
|
|
||||||
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
RUN uv pip install packaging==23.2 setuptools==75.8.0
|
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
|
||||||
uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
|
||||||
else \
|
|
||||||
uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
RUN python scripts/unsloth_install.py --uv | sh
|
|
||||||
RUN python scripts/cutcrossentropy_install.py --uv | sh
|
|
||||||
|
|
||||||
# So we can test the Docker image
|
|
||||||
RUN uv pip install -r requirements-dev.txt -r requirements-tests.txt
|
|
||||||
|
|
||||||
# fix so that git fetch/pull from remote works
|
|
||||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
|
||||||
git config --get remote.origin.fetch
|
|
||||||
|
|
||||||
# helper for huggingface-login cli
|
|
||||||
RUN git config --global credential.helper store
|
|
||||||
@@ -1,6 +1,10 @@
|
|||||||
FROM axolotlai/axolotl-base:{{ BASE_TAG }}
|
FROM axolotlai/axolotl-base-uv:{{ BASE_TAG }}
|
||||||
|
|
||||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
SHELL ["/bin/bash", "-euxo", "pipefail", "-c"]
|
||||||
|
|
||||||
|
ARG VENV_PYTHON="/workspace/axolotl-venv/bin/python"
|
||||||
|
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
||||||
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
||||||
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
||||||
ENV CUDA="{{ CUDA }}"
|
ENV CUDA="{{ CUDA }}"
|
||||||
@@ -9,7 +13,7 @@ ENV GITHUB_REF="{{ GITHUB_REF }}"
|
|||||||
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
||||||
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
||||||
ENV HF_HOME="{{ HF_HOME }}"
|
ENV HF_HOME="{{ HF_HOME }}"
|
||||||
ENV AXOLOTL_DATASET_PROCESSES="8"
|
ENV VENV_PYTHON=$VENV_PYTHON
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm
|
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm
|
||||||
@@ -25,25 +29,27 @@ RUN git fetch origin +$GITHUB_REF && \
|
|||||||
|
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
|
RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
|
||||||
sed -i 's#^transformers.*#transformers @ git+https://github.com/huggingface/transformers.git@main#' requirements.txt; \
|
sed -i 's#"transformers[^"]*"#"transformers @ git+https://github.com/huggingface/transformers.git@main"#' pyproject.toml; \
|
||||||
sed -i 's#^peft.*#peft @ git+https://github.com/huggingface/peft.git@main#' requirements.txt; \
|
sed -i 's#"peft[^"]*"#"peft @ git+https://github.com/huggingface/peft.git@main"#' pyproject.toml; \
|
||||||
sed -i 's#^accelerate.*#accelerate @ git+https://github.com/huggingface/accelerate.git@main#' requirements.txt; \
|
sed -i 's#"accelerate[^"]*"#"accelerate @ git+https://github.com/huggingface/accelerate.git@main"#' pyproject.toml; \
|
||||||
sed -i 's#^trl.*#trl @ git+https://github.com/huggingface/trl.git@main#' requirements.txt; \
|
sed -i 's#"trl[^"]*"#"trl @ git+https://github.com/huggingface/trl.git@main"#' pyproject.toml; \
|
||||||
sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
|
sed -i 's#"datasets[^"]*"#"datasets @ git+https://github.com/huggingface/datasets.git@main"#' pyproject.toml; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN pip install packaging==23.2 setuptools==75.8.0
|
RUN uv pip install --python "$VENV_PYTHON" packaging==23.2 setuptools==75.8.0 pip
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray,${AXOLOTL_EXTRAS}] $AXOLOTL_ARGS; \
|
||||||
else \
|
else \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN python scripts/unsloth_install.py | sh
|
RUN uv pip install --python "$VENV_PYTHON" --no-build-isolation flash-attn $AXOLOTL_ARGS
|
||||||
RUN python scripts/cutcrossentropy_install.py | sh
|
|
||||||
|
RUN "$VENV_PYTHON" scripts/unsloth_install.py | sh
|
||||||
|
RUN "$VENV_PYTHON" scripts/cutcrossentropy_install.py | sh
|
||||||
|
|
||||||
# So we can test the Docker image
|
# So we can test the Docker image
|
||||||
RUN pip install -r requirements-dev.txt -r requirements-tests.txt
|
RUN uv pip install --python "$VENV_PYTHON" -e ".[dev]"
|
||||||
|
|
||||||
# fix so that git fetch/pull from remote works
|
# fix so that git fetch/pull from remote works
|
||||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||||
|
|||||||
16
cicd/cicd.sh
16
cicd/cicd.sh
@@ -4,7 +4,7 @@ set -e
|
|||||||
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
||||||
|
|
||||||
# Run unit tests with initial coverage report
|
# Run unit tests with initial coverage report
|
||||||
pytest -v --durations=10 -n8 \
|
uv run pytest -v --durations=10 -n8 \
|
||||||
--ignore=tests/e2e/ \
|
--ignore=tests/e2e/ \
|
||||||
--ignore=tests/patched/ \
|
--ignore=tests/patched/ \
|
||||||
--ignore=tests/cli \
|
--ignore=tests/cli \
|
||||||
@@ -12,36 +12,36 @@ pytest -v --durations=10 -n8 \
|
|||||||
--cov=axolotl
|
--cov=axolotl
|
||||||
|
|
||||||
# Run lora kernels tests with coverage append
|
# Run lora kernels tests with coverage append
|
||||||
pytest -v --durations=10 \
|
uv run pytest -v --durations=10 \
|
||||||
/workspace/axolotl/tests/e2e/patched/lora_kernels \
|
/workspace/axolotl/tests/e2e/patched/lora_kernels \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
# Run patched tests excluding lora kernels with coverage append
|
# Run patched tests excluding lora kernels with coverage append
|
||||||
pytest --full-trace -vvv --durations=10 \
|
uv run pytest --full-trace -vvv --durations=10 \
|
||||||
--ignore=tests/e2e/patched/lora_kernels \
|
--ignore=tests/e2e/patched/lora_kernels \
|
||||||
/workspace/axolotl/tests/e2e/patched \
|
/workspace/axolotl/tests/e2e/patched \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
# Run solo tests with coverage append
|
# Run solo tests with coverage append
|
||||||
pytest -v --durations=10 -n1 \
|
uv run pytest -v --durations=10 -n1 \
|
||||||
/workspace/axolotl/tests/e2e/solo/ \
|
/workspace/axolotl/tests/e2e/solo/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
# Run integration tests with coverage append
|
# Run integration tests with coverage append
|
||||||
pytest -v --durations=10 \
|
uv run pytest -v --durations=10 \
|
||||||
/workspace/axolotl/tests/e2e/integrations/ \
|
/workspace/axolotl/tests/e2e/integrations/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
pytest -v --durations=10 /workspace/axolotl/tests/cli \
|
uv run pytest -v --durations=10 /workspace/axolotl/tests/cli \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-append
|
||||||
|
|
||||||
# Run remaining e2e tests with coverage append and final report
|
# Run remaining e2e tests with coverage append and final report
|
||||||
pytest -v --durations=10 \
|
uv run pytest -v --durations=10 \
|
||||||
--ignore=tests/e2e/solo/ \
|
--ignore=tests/e2e/solo/ \
|
||||||
--ignore=tests/e2e/patched/ \
|
--ignore=tests/e2e/patched/ \
|
||||||
--ignore=tests/e2e/multigpu/ \
|
--ignore=tests/e2e/multigpu/ \
|
||||||
@@ -52,4 +52,4 @@ pytest -v --durations=10 \
|
|||||||
--cov-append \
|
--cov-append \
|
||||||
--cov-report=xml:e2e-coverage.xml
|
--cov-report=xml:e2e-coverage.xml
|
||||||
|
|
||||||
codecov upload-process -t $CODECOV_TOKEN -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION} || true
|
uv run codecov upload-process -t $CODECOV_TOKEN -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION} || true
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ df_args = {
|
|||||||
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
||||||
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
||||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
||||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
|
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-uv-py3.11-cu126-2.6.0"),
|
||||||
"CUDA": os.environ.get("CUDA", "126"),
|
"CUDA": os.environ.get("CUDA", "126"),
|
||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ df_args = {
|
|||||||
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
||||||
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
||||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
||||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
|
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-uv-py3.11-cu126-2.6.0"),
|
||||||
"CUDA": os.environ.get("CUDA", "126"),
|
"CUDA": os.environ.get("CUDA", "126"),
|
||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
|
|||||||
@@ -1,13 +1,19 @@
|
|||||||
ARG BASE_TAG=main-base
|
ARG BASE_TAG=main-base-uv
|
||||||
FROM axolotlai/axolotl-base:$BASE_TAG
|
FROM axolotlai/axolotl-base-uv:$BASE_TAG
|
||||||
|
|
||||||
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
||||||
ARG AXOLOTL_EXTRAS=""
|
ARG AXOLOTL_EXTRAS=""
|
||||||
ARG AXOLOTL_ARGS=""
|
ARG AXOLOTL_ARGS=""
|
||||||
ARG CUDA="118"
|
ARG CUDA="118"
|
||||||
ARG PYTORCH_VERSION="2.1.2"
|
ARG PYTORCH_VERSION="2.1.2"
|
||||||
|
ARG GIT_REF="refs/heads/main"
|
||||||
|
ARG GIT_SHA="HEAD"
|
||||||
|
ARG VENV_PYTHON="/workspace/axolotl-venv/bin/python"
|
||||||
|
|
||||||
ENV PYTORCH_VERSION=$PYTORCH_VERSION
|
ENV PYTORCH_VERSION=$PYTORCH_VERSION
|
||||||
|
ENV GIT_REF=$GIT_REF
|
||||||
|
ENV GIT_SHA=$GIT_SHA
|
||||||
|
ENV VENV_PYTHON=$VENV_PYTHON
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev rsync s3fs && \
|
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev rsync s3fs && \
|
||||||
@@ -20,16 +26,19 @@ RUN git clone --depth=1 https://github.com/axolotl-ai-cloud/axolotl.git
|
|||||||
|
|
||||||
WORKDIR /workspace/axolotl
|
WORKDIR /workspace/axolotl
|
||||||
|
|
||||||
|
# Ensure we are on the expected commit and break Docker cache between revisions
|
||||||
|
RUN git fetch origin "$GIT_REF" && git checkout "$GIT_SHA"
|
||||||
|
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||||
else \
|
else \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
uv pip install --python "$VENV_PYTHON" --no-build-isolation -e .[ring-flash-attn,optimizers,ray] $AXOLOTL_ARGS; \
|
||||||
fi && \
|
fi && \
|
||||||
python scripts/unsloth_install.py | sh && \
|
uv pip install --python "$VENV_PYTHON" --no-build-isolation flash-attn $AXOLOTL_ARGS && \
|
||||||
python scripts/cutcrossentropy_install.py | sh && \
|
"$VENV_PYTHON" scripts/unsloth_install.py | sh && \
|
||||||
pip install pytest && \
|
"$VENV_PYTHON" scripts/cutcrossentropy_install.py | sh && \
|
||||||
pip cache purge
|
uv pip install --python "$VENV_PYTHON" pytest
|
||||||
|
|
||||||
# fix so that git fetch/pull from remote works with shallow clone
|
# fix so that git fetch/pull from remote works with shallow clone
|
||||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||||
|
|||||||
@@ -48,5 +48,5 @@ RUN git lfs install --skip-repo && \
|
|||||||
pip3 cache purge
|
pip3 cache purge
|
||||||
|
|
||||||
RUN if [ "$PYTORCH_VERSION" = "2.6.0" ] && [ "$CUDA" = "124" ] ; then \
|
RUN if [ "$PYTORCH_VERSION" = "2.6.0" ] && [ "$CUDA" = "124" ] ; then \
|
||||||
FLASH_ATTENTION_FORCE_BUILD="TRUE" pip3 install --no-build-isolation flash-attn==2.8.0.post2; \
|
FLASH_ATTENTION_FORCE_BUILD="TRUE" uv pip install --no-build-isolation flash-attn==2.8.0.post2; \
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ EXPOSE 22
|
|||||||
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
||||||
COPY scripts/motd /etc/motd
|
COPY scripts/motd /etc/motd
|
||||||
|
|
||||||
RUN pip install jupyterlab notebook ipywidgets && \
|
RUN uv pip install --python "$VENV_PYTHON" jupyterlab notebook ipywidgets && \
|
||||||
jupyter lab clean
|
"$VENV_PYTHON" -m jupyter lab clean
|
||||||
RUN apt update && \
|
RUN apt update && \
|
||||||
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop && \
|
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop && \
|
||||||
rm -rf /var/cache/apt/archives && \
|
rm -rf /var/cache/apt/archives && \
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ EXPOSE 22
|
|||||||
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
||||||
COPY scripts/motd /etc/motd
|
COPY scripts/motd /etc/motd
|
||||||
|
|
||||||
RUN pip install jupyterlab notebook ipywidgets && \
|
RUN uv pip install --python "$VENV_PYTHON" jupyterlab notebook ipywidgets && \
|
||||||
jupyter lab clean
|
"$VENV_PYTHON" -m jupyter lab clean
|
||||||
RUN apt update && \
|
RUN apt update && \
|
||||||
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm && \
|
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop ibverbs-providers ibverbs-utils infiniband-diags librdmacm-dev librdmacm1 rdmacm-utils slurm-wlm && \
|
||||||
rm -rf /var/cache/apt/archives && \
|
rm -rf /var/cache/apt/archives && \
|
||||||
|
|||||||
@@ -24,13 +24,14 @@ RUN git fetch origin +$GITHUB_REF && \
|
|||||||
|
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
uv pip install --no-build-isolation -e .[deepspeed,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||||
else \
|
else \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
|
uv pip install --no-build-isolation -e .[deepspeed,mamba-ssm] $AXOLOTL_ARGS; \
|
||||||
fi
|
fi && \
|
||||||
|
uv pip install --no-build-isolation flash-attn $AXOLOTL_ARGS
|
||||||
|
|
||||||
# So we can test the Docker image
|
# So we can test the Docker image
|
||||||
RUN pip install pytest
|
RUN uv pip install pytest
|
||||||
|
|
||||||
# fix so that git fetch/pull from remote works
|
# fix so that git fetch/pull from remote works
|
||||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
|||||||
ENV PYTHON_VERSION=$PYTHON_VERSION
|
ENV PYTHON_VERSION=$PYTHON_VERSION
|
||||||
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
|
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
|
||||||
ENV UV_TORCH_BACKEND="cu${CUDA}"
|
ENV UV_TORCH_BACKEND="cu${CUDA}"
|
||||||
|
ENV VENV_PYTHON=/workspace/axolotl-venv/bin/python
|
||||||
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config curl && rm -rf /var/lib/apt/lists/* \
|
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config curl && rm -rf /var/lib/apt/lists/* \
|
||||||
@@ -29,8 +30,8 @@ RUN uv venv --no-project --relocatable axolotl-venv
|
|||||||
|
|
||||||
ENV PATH="/workspace/axolotl-venv/bin:${PATH}"
|
ENV PATH="/workspace/axolotl-venv/bin:${PATH}"
|
||||||
|
|
||||||
RUN uv pip install packaging setuptools wheel psutil \
|
RUN uv pip install --python "$VENV_PYTHON" packaging setuptools wheel psutil protobuf grpclib \
|
||||||
&& uv pip install torch==${PYTORCH_VERSION} \
|
&& uv pip install --python "$VENV_PYTHON" torch==${PYTORCH_VERSION} \
|
||||||
&& uv pip install --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" \
|
&& uv pip install --python "$VENV_PYTHON" --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" \
|
||||||
&& uv pip install "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" \
|
&& uv pip install --python "$VENV_PYTHON" "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" \
|
||||||
&& uv pip install awscli pydantic
|
&& uv pip install --python "$VENV_PYTHON" awscli pydantic
|
||||||
|
|||||||
@@ -72,8 +72,8 @@ datasets:
|
|||||||
Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/userguide/development_mode.html) of Axolotl, which ensures that changes you make to the code are reflected at runtime. Run the following commands from the root of this project:
|
Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/userguide/development_mode.html) of Axolotl, which ensures that changes you make to the code are reflected at runtime. Run the following commands from the root of this project:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip3 install packaging
|
uv sync --extra deepspeed
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Remote Hosts
|
#### Remote Hosts
|
||||||
@@ -213,8 +213,8 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
|||||||
You will now be in the container. Next, perform an editable install of Axolotl:
|
You will now be in the container. Next, perform an editable install of Axolotl:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip3 install packaging
|
uv sync --extra deepspeed
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
### Attach To Container
|
### Attach To Container
|
||||||
|
|||||||
@@ -29,19 +29,40 @@ Follow the instructions at: [https://pytorch.org/get-started/locally/](https://p
|
|||||||
For Blackwell GPUs, please use Pytorch 2.7.0 and CUDA 12.8.
|
For Blackwell GPUs, please use Pytorch 2.7.0 and CUDA 12.8.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
### PyPI Installation (Recommended) {#sec-pypi}
|
### uv Installation (Recommended) {#sec-uv-quick}
|
||||||
|
|
||||||
```{.bash}
|
```{.bash}
|
||||||
pip3 install -U packaging setuptools wheel ninja
|
# Install uv if not already installed
|
||||||
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
|
||||||
|
# Add Axolotl to a project (recommended)
|
||||||
|
uv init my-project && cd my-project
|
||||||
|
uv add axolotl
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
source .venv/bin/activate
|
||||||
|
```
|
||||||
|
|
||||||
|
For a quick one-off install without creating a project:
|
||||||
|
|
||||||
|
```{.bash}
|
||||||
|
uv pip install axolotl
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
```
|
||||||
|
|
||||||
|
### pip Installation {#sec-pypi}
|
||||||
|
|
||||||
|
```{.bash}
|
||||||
|
pip install --no-build-isolation axolotl[deepspeed]
|
||||||
|
pip install --no-build-isolation flash-attn
|
||||||
```
|
```
|
||||||
|
|
||||||
We use `--no-build-isolation` in order to detect the installed PyTorch version (if
|
We use `--no-build-isolation` in order to detect the installed PyTorch version (if
|
||||||
installed) in order not to clobber it, and so that we set the correct version of
|
installed) in order not to clobber it, and so that we set the correct version of
|
||||||
dependencies that are specific to the PyTorch version or other installed
|
dependencies that are specific to the PyTorch version or other installed
|
||||||
co-dependencies.
|
co-dependencies. Flash Attention is resolved separately so it can be built against
|
||||||
|
the environment configured by the previous step.
|
||||||
|
|
||||||
### uv Installation {#sec-uv}
|
### Advanced uv Installation {#sec-uv}
|
||||||
|
|
||||||
uv is a fast, reliable Python package installer and resolver built in Rust. It offers significant performance improvements over pip and provides better dependency resolution, making it an excellent choice for complex environments.
|
uv is a fast, reliable Python package installer and resolver built in Rust. It offers significant performance improvements over pip and provides better dependency resolution, making it an excellent choice for complex environments.
|
||||||
|
|
||||||
@@ -62,28 +83,38 @@ source .venv/bin/activate
|
|||||||
Install PyTorch
|
Install PyTorch
|
||||||
- PyTorch 2.6.0 recommended
|
- PyTorch 2.6.0 recommended
|
||||||
```{.bash}
|
```{.bash}
|
||||||
uv pip install packaging setuptools wheel
|
|
||||||
uv pip install torch==2.6.0
|
uv pip install torch==2.6.0
|
||||||
uv pip install awscli pydantic
|
uv pip install awscli pydantic
|
||||||
```
|
```
|
||||||
|
|
||||||
Install axolotl from PyPi
|
Install axolotl from PyPi
|
||||||
```{.bash}
|
```{.bash}
|
||||||
uv pip install --no-build-isolation axolotl[deepspeed,flash-attn]
|
uv pip install --no-build-isolation axolotl[deepspeed]
|
||||||
|
|
||||||
# optionally install with vLLM if you're using torch==2.6.0 and want to train w/ GRPO
|
# optionally install with vLLM if you're using torch==2.6.0 and want to train w/ GRPO
|
||||||
uv pip install --no-build-isolation axolotl[deepspeed,flash-attn,vllm]
|
# uv pip install --no-build-isolation axolotl[deepspeed,vllm]
|
||||||
|
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
### Edge/Development Build {#sec-edge-build}
|
### Edge/Development Build {#sec-edge-build}
|
||||||
|
|
||||||
For the latest features between releases:
|
For the latest features between releases:
|
||||||
|
|
||||||
|
#### Using uv (recommended)
|
||||||
```{.bash}
|
```{.bash}
|
||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
pip3 install -U packaging setuptools wheel ninja
|
curl -LsSf https://astral.sh/uv/install.sh | sh # If not already installed
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
uv sync
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Using pip
|
||||||
|
```{.bash}
|
||||||
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
|
cd axolotl
|
||||||
|
pip install --no-build-isolation -e '.[deepspeed]'
|
||||||
|
pip install --no-build-isolation flash-attn
|
||||||
```
|
```
|
||||||
|
|
||||||
### Docker {#sec-docker}
|
### Docker {#sec-docker}
|
||||||
@@ -141,7 +172,7 @@ For providers supporting Docker:
|
|||||||
### macOS {#sec-macos}
|
### macOS {#sec-macos}
|
||||||
|
|
||||||
```{.bash}
|
```{.bash}
|
||||||
pip3 install --no-build-isolation -e '.'
|
uv pip install --no-build-isolation -e '.'
|
||||||
```
|
```
|
||||||
|
|
||||||
See @sec-troubleshooting for Mac-specific issues.
|
See @sec-troubleshooting for Mac-specific issues.
|
||||||
@@ -159,10 +190,15 @@ We recommend using WSL2 (Windows Subsystem for Linux) or Docker.
|
|||||||
1. Install Python ≥3.11
|
1. Install Python ≥3.11
|
||||||
2. Install PyTorch: https://pytorch.org/get-started/locally/
|
2. Install PyTorch: https://pytorch.org/get-started/locally/
|
||||||
3. Install Axolotl:
|
3. Install Axolotl:
|
||||||
```{.bash}
|
```{.bash}
|
||||||
pip3 install -U packaging setuptools wheel ninja
|
# Option A: add Axolotl to the environment
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
uv add axolotl
|
||||||
```
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
|
# Option B: quick install
|
||||||
|
uv pip install axolotl
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
```
|
||||||
4. (Optional) Login to Hugging Face:
|
4. (Optional) Login to Hugging Face:
|
||||||
```{.bash}
|
```{.bash}
|
||||||
huggingface-cli login
|
huggingface-cli login
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ chat_template: llava
|
|||||||
### Mistral-Small-3.1 {#sec-mistral-small-31}
|
### Mistral-Small-3.1 {#sec-mistral-small-31}
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install vision lib via `pip install 'mistral-common[opencv]==1.8.5'`
|
Please make sure to install vision lib via `uv pip install 'mistral-common[opencv]==1.8.5'`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -105,7 +105,7 @@ base_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503
|
|||||||
### Magistral-Small-2509 {#sec-magistral-small-2509}
|
### Magistral-Small-2509 {#sec-magistral-small-2509}
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install vision lib via `pip install 'mistral-common[opencv]==1.8.5'`
|
Please make sure to install vision lib via `uv pip install 'mistral-common[opencv]==1.8.5'`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -115,7 +115,7 @@ base_model: mistralai/Magistral-Small-2509
|
|||||||
### Voxtral {#sec-voxtral}
|
### Voxtral {#sec-voxtral}
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install audio lib via `pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'`
|
Please make sure to install audio lib via `uv pip install librosa==0.11.0 'mistral_common[audio]==1.8.3'`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -143,7 +143,7 @@ The model's initial loss and grad norm will be very high. We suspect this to be
|
|||||||
:::
|
:::
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install `timm` via `pip3 install timm==1.0.17`
|
Please make sure to install `timm` via `uv pip install timm==1.0.17`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -171,7 +171,7 @@ chat_template: qwen2_vl # same as qwen2-vl
|
|||||||
### SmolVLM2 {#sec-smolvlm2}
|
### SmolVLM2 {#sec-smolvlm2}
|
||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
Please make sure to install `num2words` via `pip3 install num2words==0.5.14`
|
Please make sure to install `num2words` via `uv pip install num2words==0.5.14`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -181,7 +181,7 @@ base_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct
|
|||||||
### LFM2-VL {#sec-lfm2-vl}
|
### LFM2-VL {#sec-lfm2-vl}
|
||||||
|
|
||||||
::: {.callout-warning}
|
::: {.callout-warning}
|
||||||
Please uninstall `causal-conv1d` via `pip3 uninstall -y causal-conv1d`
|
Please uninstall `causal-conv1d` via `uv pip uninstall -y causal-conv1d`
|
||||||
:::
|
:::
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -222,7 +222,7 @@ For audio loading, you can use the following keys within `content` alongside `"t
|
|||||||
|
|
||||||
::: {.callout-tip}
|
::: {.callout-tip}
|
||||||
|
|
||||||
You may need to install `librosa` via `pip3 install librosa==0.11.0`.
|
You may need to install `librosa` via `uv pip install librosa==0.11.0`.
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
|||||||
@@ -49,9 +49,9 @@ When sequence parallelism is enabled:
|
|||||||
To use sequence parallelism, you need:
|
To use sequence parallelism, you need:
|
||||||
|
|
||||||
- Multiple GPUs (at least 2)
|
- Multiple GPUs (at least 2)
|
||||||
- The `ring-flash-attn` package. Install with:
|
- The `ring-flash-attn` package. Install with either `uv sync --extra ring-flash-attn`
|
||||||
- `pip install axolotl[ring-flash-attn]` (preferred)
|
(from a cloned repository) or `uv pip install ring-flash-attn>=0.1.4`.
|
||||||
- `pip install ring-flash-attn>=0.1.4`
|
- Flash Attention installed separately with `uv pip install flash-attn --no-build-isolation`.
|
||||||
|
|
||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
|
|||||||
@@ -12,9 +12,14 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
|
|||||||
|
|
||||||
Here is an example of how to install from pip:
|
Here is an example of how to install from pip:
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have a compatible version of Pytorch installed
|
# Ensure you have a compatible version of PyTorch installed
|
||||||
pip3 install packaging setuptools wheel ninja
|
# Option A: manage dependencies in your project
|
||||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
uv add 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
|
# Option B: quick install
|
||||||
|
uv pip install 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Run one of the finetuning examples below.
|
2. Run one of the finetuning examples below.
|
||||||
@@ -35,7 +40,7 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
|
|||||||
|
|
||||||
- **Installation Error**: If you encounter `ImportError: ... undefined symbol ...` or `ModuleNotFoundError: No module named 'causal_conv1d_cuda'`, the `causal-conv1d` package may have been installed incorrectly. Try uninstalling it:
|
- **Installation Error**: If you encounter `ImportError: ... undefined symbol ...` or `ModuleNotFoundError: No module named 'causal_conv1d_cuda'`, the `causal-conv1d` package may have been installed incorrectly. Try uninstalling it:
|
||||||
```bash
|
```bash
|
||||||
pip uninstall -y causal-conv1d
|
uv pip uninstall -y causal-conv1d
|
||||||
```
|
```
|
||||||
|
|
||||||
- **Dataset Loading**: Read more on how to load your own dataset in our [documentation](https://docs.axolotl.ai/docs/dataset_loading.html).
|
- **Dataset Loading**: Read more on how to load your own dataset in our [documentation](https://docs.axolotl.ai/docs/dataset_loading.html).
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
uv sync
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
@@ -31,7 +31,7 @@ python scripts/cutcrossentropy_install.py | sh
|
|||||||
# For those using our Docker image, use the below path.
|
# For those using our Docker image, use the below path.
|
||||||
export CUDA_HOME=/usr/local/cuda
|
export CUDA_HOME=/usr/local/cuda
|
||||||
|
|
||||||
pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||||
```
|
```
|
||||||
|
|
||||||
For any installation errors, see [XIELU Installation Issues](#xielu-installation-issues)
|
For any installation errors, see [XIELU Installation Issues](#xielu-installation-issues)
|
||||||
@@ -67,7 +67,7 @@ If those didn't help, please try the below solutions:
|
|||||||
1. Pass env for CMAKE and try install again:
|
1. Pass env for CMAKE and try install again:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
Python_EXECUTABLE=$(which python) pip3 install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
Python_EXECUTABLE=$(which python) uv pip install git+https://github.com/nickjbrowning/XIELU@59d6031 --no-build-isolation --no-deps
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Git clone the repo and manually hardcode python path:
|
2. Git clone the repo and manually hardcode python path:
|
||||||
@@ -92,7 +92,7 @@ If those didn't help, please try the below solutions:
|
|||||||
```
|
```
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip3 install . --no-build-isolation --no-deps
|
uv pip install . --no-build-isolation --no-deps
|
||||||
```
|
```
|
||||||
|
|
||||||
## Optimization Guides
|
## Optimization Guides
|
||||||
|
|||||||
@@ -17,8 +17,8 @@ Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the A
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
uv sync
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
|||||||
@@ -12,10 +12,10 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"Axolotl is the most performant LLM post-training framework available, delivering faster training with efficient, consistent and stable performance. Train your workload and ship your product 30% faster; saving you both time and money.\n",
|
"Axolotl is the most performant LLM post-training framework available, delivering faster training with efficient, consistent and stable performance. Train your workload and ship your product 30% faster; saving you both time and money.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- ⭐ us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
|
"- \u2b50 us on [GitHub](https://github.com/axolotl-ai-cloud/axolotl)\n",
|
||||||
"- 📜 Read the [Docs](http://docs.axolotl.ai/)\n",
|
"- \ud83d\udcdc Read the [Docs](http://docs.axolotl.ai/)\n",
|
||||||
"- 💬 Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
|
"- \ud83d\udcac Chat with us on [Discord](https://discord.gg/mnpEYgRUmD)\n",
|
||||||
"- 📰 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
|
"- \ud83d\udcf0 Get updates on [X/Twitter](https://x.com/axolotl_ai)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -39,8 +39,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"%%capture\n",
|
"%%capture\n",
|
||||||
"# This step can take ~5-10 minutes to install dependencies\n",
|
"# This step can take ~5-10 minutes to install dependencies\n",
|
||||||
"!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
|
"!uv pip install --no-build-isolation axolotl>=0.9.1\n!uv pip install flash-attn --no-build-isolation\n",
|
||||||
"!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28\""
|
"!uv pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1371,7 +1371,7 @@
|
|||||||
"version_minor": 0
|
"version_minor": 0
|
||||||
},
|
},
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv\u2026"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -1729,9 +1729,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_12815f401eba44658caa7b2e490137a8",
|
"layout": "IPY_MODEL_12815f401eba44658caa7b2e490137a8",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_30e02aa2d0d241979369e598287f2639",
|
"style": "IPY_MODEL_30e02aa2d0d241979369e598287f2639",
|
||||||
"value": "Drop Samples with Zero Trainable Tokens (num_proc=2): 100%"
|
"value": "Drop\u2007Samples\u2007with\u2007Zero\u2007Trainable\u2007Tokens\u2007(num_proc=2):\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"083f9cda8d754c168beee10d2f8955a2": {
|
"083f9cda8d754c168beee10d2f8955a2": {
|
||||||
@@ -1774,9 +1774,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_b195f160ca20442fadd8b5aed0ee41af",
|
"layout": "IPY_MODEL_b195f160ca20442fadd8b5aed0ee41af",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_ca65e32eb52f48c09a84b33cb18f22cd",
|
"style": "IPY_MODEL_ca65e32eb52f48c09a84b33cb18f22cd",
|
||||||
"value": " 11.4M/11.4M [00:00<00:00, 21.8MB/s]"
|
"value": "\u200711.4M/11.4M\u2007[00:00<00:00,\u200721.8MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"0a46ad75c198463d843fb35e813642cb": {
|
"0a46ad75c198463d843fb35e813642cb": {
|
||||||
@@ -1917,7 +1917,7 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_b1bea589efa14258a9982071b87938bf",
|
"layout": "IPY_MODEL_b1bea589efa14258a9982071b87938bf",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_590eef89881545aa8bbef9a8bbe7fb00",
|
"style": "IPY_MODEL_590eef89881545aa8bbef9a8bbe7fb00",
|
||||||
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
"value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
|
||||||
}
|
}
|
||||||
@@ -1938,9 +1938,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_bfcdbba993b74972a9e3e575f86908ff",
|
"layout": "IPY_MODEL_bfcdbba993b74972a9e3e575f86908ff",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_6ebb2ec171414e47a14765505f64bb3c",
|
"style": "IPY_MODEL_6ebb2ec171414e47a14765505f64bb3c",
|
||||||
"value": " 3.84G/3.84G [00:09<00:00, 664MB/s]"
|
"value": "\u20073.84G/3.84G\u2007[00:09<00:00,\u2007664MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"0e936d9dbf9c4fdd86bbfe9730dedc47": {
|
"0e936d9dbf9c4fdd86bbfe9730dedc47": {
|
||||||
@@ -2296,9 +2296,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_349eee9f56d64f0cba6fc24ff2c50c9b",
|
"layout": "IPY_MODEL_349eee9f56d64f0cba6fc24ff2c50c9b",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_7e5d3774060e4589aa65982da5ea4ef4",
|
"style": "IPY_MODEL_7e5d3774060e4589aa65982da5ea4ef4",
|
||||||
"value": " 9985/9985 [00:04<00:00, 2604.11 examples/s]"
|
"value": "\u20079985/9985\u2007[00:04<00:00,\u20072604.11\u2007examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"16d1283741404b7bb319094c992fce01": {
|
"16d1283741404b7bb319094c992fce01": {
|
||||||
@@ -2317,9 +2317,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_a4e5789584564049b83df7c6c54a3e08",
|
"layout": "IPY_MODEL_a4e5789584564049b83df7c6c54a3e08",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_ff3a94b146a948b6907f5d80c7157f99",
|
"style": "IPY_MODEL_ff3a94b146a948b6907f5d80c7157f99",
|
||||||
"value": " 9985/0 [00:00<00:00, 50763.46 examples/s]"
|
"value": "\u20079985/0\u2007[00:00<00:00,\u200750763.46\u2007examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"1811cda0644e4190a9469d1774435d82": {
|
"1811cda0644e4190a9469d1774435d82": {
|
||||||
@@ -2390,9 +2390,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_e366ae3fceec4566b9ed303d6c5f90af",
|
"layout": "IPY_MODEL_e366ae3fceec4566b9ed303d6c5f90af",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_5dd7d150dbe04f08b165ce7f2c27cd11",
|
"style": "IPY_MODEL_5dd7d150dbe04f08b165ce7f2c27cd11",
|
||||||
"value": "model-00008-of-00008.safetensors: 100%"
|
"value": "model-00008-of-00008.safetensors:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"19127c7bb1554ccbac877059f9a82db0": {
|
"19127c7bb1554ccbac877059f9a82db0": {
|
||||||
@@ -2561,9 +2561,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_0dea5caa27384f5689e3cab51f558727",
|
"layout": "IPY_MODEL_0dea5caa27384f5689e3cab51f558727",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_a6f48410b9964fefba0c3009a77dc838",
|
"style": "IPY_MODEL_a6f48410b9964fefba0c3009a77dc838",
|
||||||
"value": " 9.68k/9.68k [00:00<00:00, 812kB/s]"
|
"value": "\u20079.68k/9.68k\u2007[00:00<00:00,\u2007812kB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"1f7d30f71bbd4547a9150d21da071055": {
|
"1f7d30f71bbd4547a9150d21da071055": {
|
||||||
@@ -2634,9 +2634,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_f4a1795dc7514a718f478245f521f0ba",
|
"layout": "IPY_MODEL_f4a1795dc7514a718f478245f521f0ba",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_5e746eb25bbe416fb585fa24e79f5177",
|
"style": "IPY_MODEL_5e746eb25bbe416fb585fa24e79f5177",
|
||||||
"value": "model-00002-of-00008.safetensors: 100%"
|
"value": "model-00002-of-00008.safetensors:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"20352e5f58d24bb8b1f3940efd14fe4a": {
|
"20352e5f58d24bb8b1f3940efd14fe4a": {
|
||||||
@@ -2707,9 +2707,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_1c6f1f10667545aaab958016ba7e2c94",
|
"layout": "IPY_MODEL_1c6f1f10667545aaab958016ba7e2c94",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_e6e969610738449887259063967f82b0",
|
"style": "IPY_MODEL_e6e969610738449887259063967f82b0",
|
||||||
"value": " 2.78M/2.78M [00:00<00:00, 17.8MB/s]"
|
"value": "\u20072.78M/2.78M\u2007[00:00<00:00,\u200717.8MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"258b7c635c1045329d4669e48c46ccd5": {
|
"258b7c635c1045329d4669e48c46ccd5": {
|
||||||
@@ -3056,9 +3056,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_be724f04b03942b2a033a7e8898bb4fd",
|
"layout": "IPY_MODEL_be724f04b03942b2a033a7e8898bb4fd",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_fcbab4d8dced41a18dfccce81e3a45a0",
|
"style": "IPY_MODEL_fcbab4d8dced41a18dfccce81e3a45a0",
|
||||||
"value": "model-00005-of-00008.safetensors: 100%"
|
"value": "model-00005-of-00008.safetensors:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"3036608c71904ce9ae4bb2a9fa8802d9": {
|
"3036608c71904ce9ae4bb2a9fa8802d9": {
|
||||||
@@ -3077,9 +3077,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_5ca6be24acb548cea130bd58e9954c7c",
|
"layout": "IPY_MODEL_5ca6be24acb548cea130bd58e9954c7c",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_5cfb02ee044b4011a378efa8b54a370f",
|
"style": "IPY_MODEL_5cfb02ee044b4011a378efa8b54a370f",
|
||||||
"value": " 3.96G/3.96G [00:10<00:00, 531MB/s]"
|
"value": "\u20073.96G/3.96G\u2007[00:10<00:00,\u2007531MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"30a81da86f8043eca301e86a8651201a": {
|
"30a81da86f8043eca301e86a8651201a": {
|
||||||
@@ -3629,9 +3629,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_8f5bd719974e41c3a8dd9a5b0d3d71e6",
|
"layout": "IPY_MODEL_8f5bd719974e41c3a8dd9a5b0d3d71e6",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_b87c84de30e84b3abf4871461fb9cbd3",
|
"style": "IPY_MODEL_b87c84de30e84b3abf4871461fb9cbd3",
|
||||||
"value": "Loading checkpoint shards: 100%"
|
"value": "Loading\u2007checkpoint\u2007shards:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"41f3b32c2f6b4034ae7a3b9124e28bc7": {
|
"41f3b32c2f6b4034ae7a3b9124e28bc7": {
|
||||||
@@ -3791,7 +3791,7 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_39789237703c4a418134243055c9cbf5",
|
"layout": "IPY_MODEL_39789237703c4a418134243055c9cbf5",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_a3a945817f684328b34651fe052393ec",
|
"style": "IPY_MODEL_a3a945817f684328b34651fe052393ec",
|
||||||
"value": "Connecting..."
|
"value": "Connecting..."
|
||||||
}
|
}
|
||||||
@@ -4077,9 +4077,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_4d468f96ec924681ad65eb671674b93e",
|
"layout": "IPY_MODEL_4d468f96ec924681ad65eb671674b93e",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_ad7599de524549c48bf2d3124ad4b299",
|
"style": "IPY_MODEL_ad7599de524549c48bf2d3124ad4b299",
|
||||||
"value": "Dropping Long Sequences (num_proc=2): 100%"
|
"value": "Dropping\u2007Long\u2007Sequences\u2007(num_proc=2):\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"5ca240f31e6b44e3882c5eb37cd5a309": {
|
"5ca240f31e6b44e3882c5eb37cd5a309": {
|
||||||
@@ -4471,9 +4471,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_5e18768f7ad6434ba8b8b8a2e853e204",
|
"layout": "IPY_MODEL_5e18768f7ad6434ba8b8b8a2e853e204",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_bb33aec33a6447078c31bfd728942994",
|
"style": "IPY_MODEL_bb33aec33a6447078c31bfd728942994",
|
||||||
"value": " 728/728 [00:00<00:00, 20.3kB/s]"
|
"value": "\u2007728/728\u2007[00:00<00:00,\u200720.3kB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"62e302ebdad64aada0ffe64ae1c873f3": {
|
"62e302ebdad64aada0ffe64ae1c873f3": {
|
||||||
@@ -4636,9 +4636,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_81c3db71ac704280ad030072655f1537",
|
"layout": "IPY_MODEL_81c3db71ac704280ad030072655f1537",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_042e091f75694c47aee761e760e76773",
|
"style": "IPY_MODEL_042e091f75694c47aee761e760e76773",
|
||||||
"value": " 9985/9985 [00:02<00:00, 3977.47 examples/s]"
|
"value": "\u20079985/9985\u2007[00:02<00:00,\u20073977.47\u2007examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"67da6c4260574869aa24c3cbc1bc1654": {
|
"67da6c4260574869aa24c3cbc1bc1654": {
|
||||||
@@ -4778,7 +4778,7 @@
|
|||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"disabled": false,
|
"disabled": false,
|
||||||
"layout": "IPY_MODEL_2e257c8be2da40b4bb67a9e4ab6811f3",
|
"layout": "IPY_MODEL_2e257c8be2da40b4bb67a9e4ab6811f3",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_56e3768bef5a4b9db4168c5c17f509c2",
|
"style": "IPY_MODEL_56e3768bef5a4b9db4168c5c17f509c2",
|
||||||
"value": ""
|
"value": ""
|
||||||
}
|
}
|
||||||
@@ -4823,9 +4823,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_41f3b32c2f6b4034ae7a3b9124e28bc7",
|
"layout": "IPY_MODEL_41f3b32c2f6b4034ae7a3b9124e28bc7",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_a10d0a76010f4e508c65a9b69ebc5156",
|
"style": "IPY_MODEL_a10d0a76010f4e508c65a9b69ebc5156",
|
||||||
"value": "Tokenizing Prompts (num_proc=2): 100%"
|
"value": "Tokenizing\u2007Prompts\u2007(num_proc=2):\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"704f2f5a9b1c49d5a75a0025a5dda11b": {
|
"704f2f5a9b1c49d5a75a0025a5dda11b": {
|
||||||
@@ -5071,9 +5071,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_93a44a11aa4846fa8efc6c1413ef1627",
|
"layout": "IPY_MODEL_93a44a11aa4846fa8efc6c1413ef1627",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_a55060adc3564407ac81ad7297d34aaa",
|
"style": "IPY_MODEL_a55060adc3564407ac81ad7297d34aaa",
|
||||||
"value": "train.jsonl: 100%"
|
"value": "train.jsonl:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"7be6f04c284e4326bb4ff3d301e7b3c6": {
|
"7be6f04c284e4326bb4ff3d301e7b3c6": {
|
||||||
@@ -5138,9 +5138,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_7fd44cf9ca6e4726bfd7ac21846d6a14",
|
"layout": "IPY_MODEL_7fd44cf9ca6e4726bfd7ac21846d6a14",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_366a343b62fa47d8985a3bd464d99f9e",
|
"style": "IPY_MODEL_366a343b62fa47d8985a3bd464d99f9e",
|
||||||
"value": "config.json: 100%"
|
"value": "config.json:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"7cd0b85ebd204b7aba908417811ce4e0": {
|
"7cd0b85ebd204b7aba908417811ce4e0": {
|
||||||
@@ -5339,9 +5339,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_67da6c4260574869aa24c3cbc1bc1654",
|
"layout": "IPY_MODEL_67da6c4260574869aa24c3cbc1bc1654",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_94b9088614464f60a203de39dbcae853",
|
"style": "IPY_MODEL_94b9088614464f60a203de39dbcae853",
|
||||||
"value": " 8/8 [01:47<00:00, 11.64s/it]"
|
"value": "\u20078/8\u2007[01:47<00:00,\u200711.64s/it]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"823f1c78f15043e38bbd4dca3932a86a": {
|
"823f1c78f15043e38bbd4dca3932a86a": {
|
||||||
@@ -5488,7 +5488,7 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_8640ac440fbc4644b9a3af7ba3ae7183",
|
"layout": "IPY_MODEL_8640ac440fbc4644b9a3af7ba3ae7183",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_5cea7996f02040b187ece0bb2d6a8d1f",
|
"style": "IPY_MODEL_5cea7996f02040b187ece0bb2d6a8d1f",
|
||||||
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
"value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
|
||||||
}
|
}
|
||||||
@@ -5509,9 +5509,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_ef223e8504b64e3592589880326aaf41",
|
"layout": "IPY_MODEL_ef223e8504b64e3592589880326aaf41",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_598da69727bd4fb8b1caf465ac736d7a",
|
"style": "IPY_MODEL_598da69727bd4fb8b1caf465ac736d7a",
|
||||||
"value": " 1.67M/1.67M [00:00<00:00, 19.0MB/s]"
|
"value": "\u20071.67M/1.67M\u2007[00:00<00:00,\u200719.0MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"897b77a56c09479bb11d7f2a30997e55": {
|
"897b77a56c09479bb11d7f2a30997e55": {
|
||||||
@@ -5717,9 +5717,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_37de928300e34184881039378bd75e7f",
|
"layout": "IPY_MODEL_37de928300e34184881039378bd75e7f",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_0e936d9dbf9c4fdd86bbfe9730dedc47",
|
"style": "IPY_MODEL_0e936d9dbf9c4fdd86bbfe9730dedc47",
|
||||||
"value": " 3.96G/3.96G [00:13<00:00, 273MB/s]"
|
"value": "\u20073.96G/3.96G\u2007[00:13<00:00,\u2007273MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"936d04b5fe1b4c63bf0b080e423d051b": {
|
"936d04b5fe1b4c63bf0b080e423d051b": {
|
||||||
@@ -6050,9 +6050,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_d955dcaa0e944e719f3a06139dd54a03",
|
"layout": "IPY_MODEL_d955dcaa0e944e719f3a06139dd54a03",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_d3de2662c7964f1ba96e58da382af720",
|
"style": "IPY_MODEL_d3de2662c7964f1ba96e58da382af720",
|
||||||
"value": "merges.txt: 100%"
|
"value": "merges.txt:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"9cd5211b5d8b457aa0002f1d17b80028": {
|
"9cd5211b5d8b457aa0002f1d17b80028": {
|
||||||
@@ -6071,9 +6071,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_6932489232ec4ab18a160b1e7fbcdfe1",
|
"layout": "IPY_MODEL_6932489232ec4ab18a160b1e7fbcdfe1",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_4540927d98f54466b434ba4c0edf045d",
|
"style": "IPY_MODEL_4540927d98f54466b434ba4c0edf045d",
|
||||||
"value": "model-00007-of-00008.safetensors: 100%"
|
"value": "model-00007-of-00008.safetensors:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"9d4897eefb5f48259ffb2d23e332f752": {
|
"9d4897eefb5f48259ffb2d23e332f752": {
|
||||||
@@ -6303,9 +6303,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_3aaecbf540f54a2db9ab0931e3b1fe57",
|
"layout": "IPY_MODEL_3aaecbf540f54a2db9ab0931e3b1fe57",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_9e333ed3b5014069ac1dd969255dd591",
|
"style": "IPY_MODEL_9e333ed3b5014069ac1dd969255dd591",
|
||||||
"value": " 239/239 [00:00<00:00, 30.9kB/s]"
|
"value": "\u2007239/239\u2007[00:00<00:00,\u200730.9kB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"a20927bf5f2c41f58c1e31ac858ab36c": {
|
"a20927bf5f2c41f58c1e31ac858ab36c": {
|
||||||
@@ -6324,9 +6324,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_1811cda0644e4190a9469d1774435d82",
|
"layout": "IPY_MODEL_1811cda0644e4190a9469d1774435d82",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_35c811d2ae8e43f3b5cecbdd3cfa857f",
|
"style": "IPY_MODEL_35c811d2ae8e43f3b5cecbdd3cfa857f",
|
||||||
"value": "tokenizer.json: 100%"
|
"value": "tokenizer.json:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"a3a945817f684328b34651fe052393ec": {
|
"a3a945817f684328b34651fe052393ec": {
|
||||||
@@ -6360,9 +6360,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_ed5ca967ad5342929e578ac6aa4dc4c0",
|
"layout": "IPY_MODEL_ed5ca967ad5342929e578ac6aa4dc4c0",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_af401d117d5047629d3a6e2361757b62",
|
"style": "IPY_MODEL_af401d117d5047629d3a6e2361757b62",
|
||||||
"value": "model-00001-of-00008.safetensors: 100%"
|
"value": "model-00001-of-00008.safetensors:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"a4e5789584564049b83df7c6c54a3e08": {
|
"a4e5789584564049b83df7c6c54a3e08": {
|
||||||
@@ -6494,9 +6494,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_fa1282ccc7544e4f818e2f03ccffe4a5",
|
"layout": "IPY_MODEL_fa1282ccc7544e4f818e2f03ccffe4a5",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_bbbf575d2a4b4c6ea8389be79b2a6039",
|
"style": "IPY_MODEL_bbbf575d2a4b4c6ea8389be79b2a6039",
|
||||||
"value": "model.safetensors.index.json: 100%"
|
"value": "model.safetensors.index.json:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ab93eabd7cea4b94b4b7a387f101e8a1": {
|
"ab93eabd7cea4b94b4b7a387f101e8a1": {
|
||||||
@@ -6582,9 +6582,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_62e302ebdad64aada0ffe64ae1c873f3",
|
"layout": "IPY_MODEL_62e302ebdad64aada0ffe64ae1c873f3",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_bd1b0dfed6d34d16af33a4a58330f5ec",
|
"style": "IPY_MODEL_bd1b0dfed6d34d16af33a4a58330f5ec",
|
||||||
"value": "Saving the dataset (1/1 shards): 100%"
|
"value": "Saving\u2007the\u2007dataset\u2007(1/1\u2007shards):\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ad7599de524549c48bf2d3124ad4b299": {
|
"ad7599de524549c48bf2d3124ad4b299": {
|
||||||
@@ -6967,9 +6967,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_2b3a2659b12244bd8548320320016dbf",
|
"layout": "IPY_MODEL_2b3a2659b12244bd8548320320016dbf",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_0cd7efffbb3c4c4b972e63749f61ab97",
|
"style": "IPY_MODEL_0cd7efffbb3c4c4b972e63749f61ab97",
|
||||||
"value": "Generating train split: "
|
"value": "Generating\u2007train\u2007split:\u2007"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"b87c84de30e84b3abf4871461fb9cbd3": {
|
"b87c84de30e84b3abf4871461fb9cbd3": {
|
||||||
@@ -7085,9 +7085,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_0f480e3a0b0a45d2a2d2dec3cad923f3",
|
"layout": "IPY_MODEL_0f480e3a0b0a45d2a2d2dec3cad923f3",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_fcb30372e7404c5d8a1ad4df91e6c7b2",
|
"style": "IPY_MODEL_fcb30372e7404c5d8a1ad4df91e6c7b2",
|
||||||
"value": " 1.91G/1.91G [00:05<00:00, 444MB/s]"
|
"value": "\u20071.91G/1.91G\u2007[00:05<00:00,\u2007444MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"bd1b0dfed6d34d16af33a4a58330f5ec": {
|
"bd1b0dfed6d34d16af33a4a58330f5ec": {
|
||||||
@@ -7325,9 +7325,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_158c8b85dbf34de6a94b4e35e2fc7d5a",
|
"layout": "IPY_MODEL_158c8b85dbf34de6a94b4e35e2fc7d5a",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_0b4c9753a7cb4354b8e5f187e6e1ad7c",
|
"style": "IPY_MODEL_0b4c9753a7cb4354b8e5f187e6e1ad7c",
|
||||||
"value": " 3.96G/3.96G [00:15<00:00, 564MB/s]"
|
"value": "\u20073.96G/3.96G\u2007[00:15<00:00,\u2007564MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c0991cf63ee6458b96e9a75e7a88b61a": {
|
"c0991cf63ee6458b96e9a75e7a88b61a": {
|
||||||
@@ -7346,9 +7346,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_ed28e2e0410d4e0b855467e798e53d66",
|
"layout": "IPY_MODEL_ed28e2e0410d4e0b855467e798e53d66",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_d93f134f802b4b69b575bdaf07dbd27c",
|
"style": "IPY_MODEL_d93f134f802b4b69b575bdaf07dbd27c",
|
||||||
"value": "tokenizer_config.json: 100%"
|
"value": "tokenizer_config.json:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c12ea43372ac4d57bb9605f1a429b397": {
|
"c12ea43372ac4d57bb9605f1a429b397": {
|
||||||
@@ -7581,9 +7581,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_8bc9d8ba866c442b9118d9630009939c",
|
"layout": "IPY_MODEL_8bc9d8ba866c442b9118d9630009939c",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_9f56a2d9979c4bd8928c644c22c3ecdf",
|
"style": "IPY_MODEL_9f56a2d9979c4bd8928c644c22c3ecdf",
|
||||||
"value": "model-00003-of-00008.safetensors: 100%"
|
"value": "model-00003-of-00008.safetensors:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c6164e05a1914ae48083db9ad7f4ef7c": {
|
"c6164e05a1914ae48083db9ad7f4ef7c": {
|
||||||
@@ -7694,9 +7694,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_e40d1c1ac9494b3bade9858324e7ffdf",
|
"layout": "IPY_MODEL_e40d1c1ac9494b3bade9858324e7ffdf",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_d65b6b060d9845779299491ac5599c31",
|
"style": "IPY_MODEL_d65b6b060d9845779299491ac5599c31",
|
||||||
"value": " 9985/9985 [01:04<00:00, 189.08 examples/s]"
|
"value": "\u20079985/9985\u2007[01:04<00:00,\u2007189.08\u2007examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"c7433acd3c4841e6958ae8f7e87b1808": {
|
"c7433acd3c4841e6958ae8f7e87b1808": {
|
||||||
@@ -7737,9 +7737,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_0077aedc3d174560bce924ee89e9c006",
|
"layout": "IPY_MODEL_0077aedc3d174560bce924ee89e9c006",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_00321cce58884f6f9b3855a21fcd9187",
|
"style": "IPY_MODEL_00321cce58884f6f9b3855a21fcd9187",
|
||||||
"value": "Add position_id column (Sample Packing) (num_proc=2): 100%"
|
"value": "Add\u2007position_id\u2007column\u2007(Sample\u2007Packing)\u2007(num_proc=2):\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ca65e32eb52f48c09a84b33cb18f22cd": {
|
"ca65e32eb52f48c09a84b33cb18f22cd": {
|
||||||
@@ -8162,9 +8162,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_63580b6fb30642479fe3000915bf551a",
|
"layout": "IPY_MODEL_63580b6fb30642479fe3000915bf551a",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_8f726dbfb45d4528afa33e36a6313267",
|
"style": "IPY_MODEL_8f726dbfb45d4528afa33e36a6313267",
|
||||||
"value": " 27.3M/27.3M [00:00<00:00, 31.0MB/s]"
|
"value": "\u200727.3M/27.3M\u2007[00:00<00:00,\u200731.0MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"d43c6df07ddb466587807d6dbe1ff614": {
|
"d43c6df07ddb466587807d6dbe1ff614": {
|
||||||
@@ -8183,9 +8183,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_8c4d4fc5a30f4e7cb3be53fe2adda33d",
|
"layout": "IPY_MODEL_8c4d4fc5a30f4e7cb3be53fe2adda33d",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_e90658f4bcb642baa78426012f863152",
|
"style": "IPY_MODEL_e90658f4bcb642baa78426012f863152",
|
||||||
"value": "model-00004-of-00008.safetensors: 100%"
|
"value": "model-00004-of-00008.safetensors:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"d65b6b060d9845779299491ac5599c31": {
|
"d65b6b060d9845779299491ac5599c31": {
|
||||||
@@ -8474,9 +8474,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_34cf3df51fbc41cabfdbba153c007f0e",
|
"layout": "IPY_MODEL_34cf3df51fbc41cabfdbba153c007f0e",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_ac764024cf1c4e08ba7749afd2cd20ac",
|
"style": "IPY_MODEL_ac764024cf1c4e08ba7749afd2cd20ac",
|
||||||
"value": "vocab.json: 100%"
|
"value": "vocab.json:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"dfd2a2649b8341ef913207526708aff1": {
|
"dfd2a2649b8341ef913207526708aff1": {
|
||||||
@@ -8669,9 +8669,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_c6164e05a1914ae48083db9ad7f4ef7c",
|
"layout": "IPY_MODEL_c6164e05a1914ae48083db9ad7f4ef7c",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_813621384dc748b0ad06775e22761c0b",
|
"style": "IPY_MODEL_813621384dc748b0ad06775e22761c0b",
|
||||||
"value": " 9985/9985 [00:03<00:00, 3622.89 examples/s]"
|
"value": "\u20079985/9985\u2007[00:03<00:00,\u20073622.89\u2007examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"e400cbf14bcc446a9d33b210cd93550b": {
|
"e400cbf14bcc446a9d33b210cd93550b": {
|
||||||
@@ -9065,9 +9065,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_fba7aa824b38467ab3061b226114cdec",
|
"layout": "IPY_MODEL_fba7aa824b38467ab3061b226114cdec",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_f3075dccbd2747b4a7913b66f44f2596",
|
"style": "IPY_MODEL_f3075dccbd2747b4a7913b66f44f2596",
|
||||||
"value": " 3.96G/3.96G [00:13<00:00, 398MB/s]"
|
"value": "\u20073.96G/3.96G\u2007[00:13<00:00,\u2007398MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ec030fc3c346426f9abc3a89892258d3": {
|
"ec030fc3c346426f9abc3a89892258d3": {
|
||||||
@@ -9110,9 +9110,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_936d04b5fe1b4c63bf0b080e423d051b",
|
"layout": "IPY_MODEL_936d04b5fe1b4c63bf0b080e423d051b",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_f1cef8e8dc2646fb9fd09f3b09081074",
|
"style": "IPY_MODEL_f1cef8e8dc2646fb9fd09f3b09081074",
|
||||||
"value": " 36.5k/36.5k [00:00<00:00, 4.32MB/s]"
|
"value": "\u200736.5k/36.5k\u2007[00:00<00:00,\u20074.32MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ed28e2e0410d4e0b855467e798e53d66": {
|
"ed28e2e0410d4e0b855467e798e53d66": {
|
||||||
@@ -9422,9 +9422,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_735d4f225b24414294fc1b213c61223c",
|
"layout": "IPY_MODEL_735d4f225b24414294fc1b213c61223c",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_5e5e15b0569b474c9620083b3ec6af55",
|
"style": "IPY_MODEL_5e5e15b0569b474c9620083b3ec6af55",
|
||||||
"value": "generation_config.json: 100%"
|
"value": "generation_config.json:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"f4667818b9d34a09891cd727a429a610": {
|
"f4667818b9d34a09891cd727a429a610": {
|
||||||
@@ -9443,9 +9443,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_4b27c267393640f28f6eae0875bd2ed9",
|
"layout": "IPY_MODEL_4b27c267393640f28f6eae0875bd2ed9",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_9858cb74a09748a39e8149baac96702c",
|
"style": "IPY_MODEL_9858cb74a09748a39e8149baac96702c",
|
||||||
"value": " 3.96G/3.96G [00:11<00:00, 457MB/s]"
|
"value": "\u20073.96G/3.96G\u2007[00:11<00:00,\u2007457MB/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"f4a1795dc7514a718f478245f521f0ba": {
|
"f4a1795dc7514a718f478245f521f0ba": {
|
||||||
@@ -9830,9 +9830,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_d1f9b10c130542f094c8fd3d1e23b5e9",
|
"layout": "IPY_MODEL_d1f9b10c130542f094c8fd3d1e23b5e9",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_e575d87a7efe4ec7b1efde489839d4a6",
|
"style": "IPY_MODEL_e575d87a7efe4ec7b1efde489839d4a6",
|
||||||
"value": "model-00006-of-00008.safetensors: 100%"
|
"value": "model-00006-of-00008.safetensors:\u2007100%"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"fe18bba7f3fb4c31bf840541f36b3425": {
|
"fe18bba7f3fb4c31bf840541f36b3425": {
|
||||||
@@ -9873,9 +9873,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_e5a82df528bb4e408797a3b6c2758f4a",
|
"layout": "IPY_MODEL_e5a82df528bb4e408797a3b6c2758f4a",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_f113ebd8c1c34806bea4dd7ed3035173",
|
"style": "IPY_MODEL_f113ebd8c1c34806bea4dd7ed3035173",
|
||||||
"value": " 9985/9985 [00:00<00:00, 44264.88 examples/s]"
|
"value": "\u20079985/9985\u2007[00:00<00:00,\u200744264.88\u2007examples/s]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"fea1b70fb46745feb5111b3929175b5d": {
|
"fea1b70fb46745feb5111b3929175b5d": {
|
||||||
@@ -9931,9 +9931,9 @@
|
|||||||
"description": "",
|
"description": "",
|
||||||
"description_tooltip": null,
|
"description_tooltip": null,
|
||||||
"layout": "IPY_MODEL_ab93eabd7cea4b94b4b7a387f101e8a1",
|
"layout": "IPY_MODEL_ab93eabd7cea4b94b4b7a387f101e8a1",
|
||||||
"placeholder": "",
|
"placeholder": "\u200b",
|
||||||
"style": "IPY_MODEL_704f2f5a9b1c49d5a75a0025a5dda11b",
|
"style": "IPY_MODEL_704f2f5a9b1c49d5a75a0025a5dda11b",
|
||||||
"value": " 3.96G/3.96G [00:12<00:00, 656MB/s]"
|
"value": "\u20073.96G/3.96G\u2007[00:12<00:00,\u2007656MB/s]"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,8 +16,13 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
# Option A: manage dependencies in your project
|
||||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
uv add 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
|
# Option B: quick install
|
||||||
|
uv pip install 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
||||||
|
|||||||
@@ -10,17 +10,22 @@ Gemma-3n is a family of multimodal models from Google found on [HuggingFace](htt
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
# Option A: manage dependencies in your project
|
||||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
uv add 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
|
# Option B: quick install
|
||||||
|
uv pip install 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
2. In addition to Axolotl's requirements, Gemma-3n requires:
|
2. In addition to Axolotl's requirements, Gemma-3n requires:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip3 install timm==1.0.17
|
uv pip install timm==1.0.17
|
||||||
|
|
||||||
# for loading audio data
|
# for loading audio data
|
||||||
pip3 install librosa==0.11.0
|
uv pip install librosa==0.11.0
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Download sample dataset files
|
3. Download sample dataset files
|
||||||
|
|||||||
@@ -12,8 +12,13 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
# Option A: manage dependencies in your project
|
||||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
uv add 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
|
# Option B: quick install
|
||||||
|
uv pip install 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Choose one of the following configs below for training the 20B model. (for 120B, see [below](#training-120b))
|
2. Choose one of the following configs below for training the 20B model. (for 120B, see [below](#training-120b))
|
||||||
@@ -75,7 +80,7 @@ for more information about using a special vllm-openai docker image for inferenc
|
|||||||
Optionally, vLLM can be installed from nightly:
|
Optionally, vLLM can be installed from nightly:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly
|
uv pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly
|
||||||
```
|
```
|
||||||
and the vLLM server can be started with the following command (modify `--tensor-parallel-size 8` to match your environment):
|
and the vLLM server can be started with the following command (modify `--tensor-parallel-size 8` to match your environment):
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ Tencent released a family of opensource models called HunYuan with varying param
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
uv sync
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
|||||||
@@ -13,9 +13,14 @@ Thanks to the team at MistralAI for giving us early access to prepare for these
|
|||||||
Here is an example of how to install from pip:
|
Here is an example of how to install from pip:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
# Ensure you have PyTorch installed (PyTorch 2.6.0 min)
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
# Option A: manage dependencies in your project
|
||||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
uv add 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
|
# Option B: quick install
|
||||||
|
uv pip install 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
2. Install [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy) to reduce training VRAM usage
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
uv sync
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
@@ -24,12 +24,12 @@ python scripts/cutcrossentropy_install.py | sh
|
|||||||
|
|
||||||
2. Install Qwen3-Next transformers commit
|
2. Install Qwen3-Next transformers commit
|
||||||
```bash
|
```bash
|
||||||
pip3 uninstall -y transformers && pip3 install "git+https://github.com/huggingface/transformers.git@b9282355bea846b54ed850a066901496b19da654"
|
uv pip uninstall -y transformers && uv pip install "git+https://github.com/huggingface/transformers.git@b9282355bea846b54ed850a066901496b19da654"
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Install FLA for improved performance
|
3. Install FLA for improved performance
|
||||||
```bash
|
```bash
|
||||||
pip3 uninstall -y causal-conv1d && pip3 install flash-linear-attention==0.3.2
|
uv pip uninstall -y causal-conv1d && uv pip install flash-linear-attention==0.3.2
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Run the finetuning example:
|
4. Run the finetuning example:
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||||
cd axolotl
|
cd axolotl
|
||||||
|
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
uv sync --extra deepspeed
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
# Install Cut Cross Entropy
|
# Install Cut Cross Entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
|||||||
@@ -13,14 +13,19 @@ This guide shows how to fine-tune SmolVLM2 models with Axolotl.
|
|||||||
Here is an example of how to install from pip:
|
Here is an example of how to install from pip:
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have a compatible version of Pytorch installed
|
# Ensure you have a compatible version of Pytorch installed
|
||||||
pip3 install packaging setuptools wheel ninja
|
# Option A: manage dependencies in your project
|
||||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
uv add 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
|
# Option B: quick install
|
||||||
|
uv pip install 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install an extra dependency:
|
2. Install an extra dependency:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip3 install num2words==0.5.14
|
uv pip install num2words==0.5.14
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Run the finetuning example:
|
3. Run the finetuning example:
|
||||||
|
|||||||
@@ -12,16 +12,21 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
# Option A: manage dependencies in your project
|
||||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
uv add 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
|
|
||||||
|
# Option B: quick install
|
||||||
|
uv pip install 'axolotl>=0.12.0'
|
||||||
|
uv pip install flash-attn --no-build-isolation
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Please install the below.
|
2. Please install the below.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# audio
|
# audio
|
||||||
pip3 install librosa==0.11.0
|
uv pip install librosa==0.11.0
|
||||||
pip3 install 'mistral_common[audio]==1.8.3'
|
uv pip install 'mistral_common[audio]==1.8.3'
|
||||||
|
|
||||||
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
# Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
|
|||||||
197
pyproject.toml
197
pyproject.toml
@@ -1,14 +1,131 @@
|
|||||||
[build-system]
|
[build-system]
|
||||||
requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==23.2"]
|
requires = ["setuptools>=64", "wheel", "setuptools_scm>=8"]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "axolotl"
|
name = "axolotl"
|
||||||
dynamic = ["version", "dependencies", "optional-dependencies"]
|
dynamic = ["version"]
|
||||||
description = "LLM Trainer"
|
description = "LLM Trainer"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10,<3.13"
|
||||||
# license = "Apache-2.0"
|
license = {text = "Apache-2.0"}
|
||||||
|
authors = [
|
||||||
|
{name = "Axolotl AI"},
|
||||||
|
]
|
||||||
|
maintainers = [
|
||||||
|
{name = "Axolotl AI"},
|
||||||
|
]
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 4 - Beta",
|
||||||
|
"License :: OSI Approved :: Apache Software License",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
]
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"torch>=2.6.0",
|
||||||
|
"packaging>=23.2",
|
||||||
|
"huggingface_hub>=0.33.0",
|
||||||
|
"peft==0.17.0",
|
||||||
|
"transformers==4.56.1",
|
||||||
|
"tokenizers>=0.21.1",
|
||||||
|
"accelerate==1.10.1",
|
||||||
|
"datasets==4.0.0",
|
||||||
|
"trl==0.23.0",
|
||||||
|
"hf_xet==1.1.5",
|
||||||
|
"kernels==0.9.0",
|
||||||
|
"trackio",
|
||||||
|
"optimum==1.16.2",
|
||||||
|
"hf_transfer",
|
||||||
|
"sentencepiece",
|
||||||
|
"gradio==5.41.1",
|
||||||
|
"modal==1.0.2",
|
||||||
|
"pydantic>=2.10.6",
|
||||||
|
"addict",
|
||||||
|
"fire",
|
||||||
|
"PyYAML>=6.0",
|
||||||
|
"requests",
|
||||||
|
"wandb",
|
||||||
|
"einops",
|
||||||
|
"colorama",
|
||||||
|
"numba",
|
||||||
|
"numpy>=1.24.4,<3.0",
|
||||||
|
"evaluate==0.4.1",
|
||||||
|
"scipy",
|
||||||
|
"scikit-learn>=1.7.0",
|
||||||
|
"nvidia-ml-py==12.560.30",
|
||||||
|
"art",
|
||||||
|
"tensorboard",
|
||||||
|
"python-dotenv==1.0.1",
|
||||||
|
"s3fs>=2024.5.0",
|
||||||
|
"gcsfs>=2024.5.0",
|
||||||
|
"adlfs>=2024.5.0",
|
||||||
|
"ocifs==1.3.2",
|
||||||
|
"zstandard>=0.23.0",
|
||||||
|
"fastcore",
|
||||||
|
"lm_eval==0.4.7",
|
||||||
|
"langdetect==1.0.9",
|
||||||
|
"immutabledict==4.2.0",
|
||||||
|
"antlr4-python3-runtime==4.13.2",
|
||||||
|
"schedulefree==1.4.1",
|
||||||
|
"mistral-common==1.8.5",
|
||||||
|
|
||||||
|
# Axolotl contribs
|
||||||
|
"axolotl-contribs-lgpl @ git+https://github.com/axolotl-ai-cloud/axolotl-contribs-lgpl.git@numpy",
|
||||||
|
"axolotl-contribs-mit==0.0.5",
|
||||||
|
|
||||||
|
# Platform-specific dependencies (Linux by default, excluded on macOS)
|
||||||
|
"triton>=3.0.0 ; sys_platform != 'darwin'",
|
||||||
|
"xformers>=0.0.28 ; sys_platform != 'darwin'",
|
||||||
|
"autoawq==0.2.7.post3 ; sys_platform != 'darwin'",
|
||||||
|
"liger-kernel==0.6.1 ; sys_platform != 'darwin'",
|
||||||
|
"torchao==0.13.0 ; sys_platform != 'darwin'",
|
||||||
|
"bitsandbytes==0.47.0 ; sys_platform != 'darwin'",
|
||||||
|
"deepspeed>=0.17.5 ; sys_platform != 'darwin'",
|
||||||
|
"deepspeed-kernels ; sys_platform != 'darwin'",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
ring-flash-attn = [
|
||||||
|
"ring-flash-attn>=0.1.7",
|
||||||
|
"yunchang==0.6.0",
|
||||||
|
]
|
||||||
|
mamba-ssm = ["mamba-ssm>=2.2.0", "causal_conv1d>=1.4.0",]
|
||||||
|
gptqmodel = ["gptqmodel>=4.0.0"]
|
||||||
|
mlflow = ["mlflow"]
|
||||||
|
galore = ["galore_torch"]
|
||||||
|
apollo = ["apollo-torch"]
|
||||||
|
optimizers = [
|
||||||
|
"galore_torch",
|
||||||
|
"apollo-torch",
|
||||||
|
"lomo-optim==0.1.1",
|
||||||
|
"torch-optimi==0.2.1",
|
||||||
|
"came_pytorch==0.1.3",
|
||||||
|
]
|
||||||
|
ray = ["ray[train]"]
|
||||||
|
vllm = ["vllm>=0.10.0"]
|
||||||
|
llmcompressor = ["llmcompressor>=0.5.1"]
|
||||||
|
fbgemm-gpu = ["fbgemm-gpu-genai>=1.2.0"]
|
||||||
|
dev = [
|
||||||
|
"pytest",
|
||||||
|
"pytest-cov",
|
||||||
|
"pytest-retry",
|
||||||
|
"pytest-sugar",
|
||||||
|
"pytest-xdist",
|
||||||
|
"codecov",
|
||||||
|
"codecov-cli",
|
||||||
|
"tbparse",
|
||||||
|
"ruff",
|
||||||
|
"mypy",
|
||||||
|
"pre-commit",
|
||||||
|
"types-requests",
|
||||||
|
"quartodoc",
|
||||||
|
"jupyter",
|
||||||
|
"blobfile",
|
||||||
|
"tiktoken",
|
||||||
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
axolotl = "axolotl.cli.main:main"
|
axolotl = "axolotl.cli.main:main"
|
||||||
@@ -17,15 +134,20 @@ axolotl = "axolotl.cli.main:main"
|
|||||||
Homepage = "https://axolotl.ai/"
|
Homepage = "https://axolotl.ai/"
|
||||||
Documentation = "https://docs.axolotl.ai/"
|
Documentation = "https://docs.axolotl.ai/"
|
||||||
Repository = "https://github.com/axolotl-ai-cloud/axolotl.git"
|
Repository = "https://github.com/axolotl-ai-cloud/axolotl.git"
|
||||||
|
Issues = "https://github.com/axolotl-ai-cloud/axolotl/issues"
|
||||||
[tool.setuptools_scm]
|
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
py-modules = ["setuptools_axolotl_dynamic_dependencies"]
|
package-dir = {"" = "src"}
|
||||||
include-package-data = true
|
include-package-data = true
|
||||||
|
|
||||||
[tool.setuptools.cmdclass]
|
[tool.setuptools.packages.find]
|
||||||
build_py = "setuptools_axolotl_dynamic_dependencies.BuildPyCommand"
|
where = ["src"]
|
||||||
|
|
||||||
|
[tool.setuptools.package-data]
|
||||||
|
"*" = ["*.yaml", "*.yml", "*.json"]
|
||||||
|
|
||||||
|
[tool.setuptools_scm]
|
||||||
|
write_to = "src/axolotl/_version.py"
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
line-length = 88
|
line-length = 88
|
||||||
@@ -57,3 +179,60 @@ indent-style = "space"
|
|||||||
skip-magic-trailing-comma = false
|
skip-magic-trailing-comma = false
|
||||||
line-ending = "auto"
|
line-ending = "auto"
|
||||||
docstring-code-format = false
|
docstring-code-format = false
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.11"
|
||||||
|
warn_return_any = true
|
||||||
|
warn_unused_configs = true
|
||||||
|
ignore_missing_imports = true
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
python_files = ["test_*.py", "*_test.py"]
|
||||||
|
addopts = "-v --tb=short"
|
||||||
|
|
||||||
|
# UV specific configuration
|
||||||
|
[tool.uv]
|
||||||
|
prerelease = "allow"
|
||||||
|
default-groups = ["default"]
|
||||||
|
conflicts = [
|
||||||
|
[
|
||||||
|
{ group = "default" },
|
||||||
|
{ extra = "vllm" },
|
||||||
|
],
|
||||||
|
]
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
default = ["torch>=2.6.0"]
|
||||||
|
dev = [
|
||||||
|
"pytest",
|
||||||
|
"pytest-cov",
|
||||||
|
"pytest-retry",
|
||||||
|
"pytest-sugar",
|
||||||
|
"pytest-xdist",
|
||||||
|
"codecov",
|
||||||
|
"codecov-cli",
|
||||||
|
"tbparse",
|
||||||
|
"ruff",
|
||||||
|
"mypy",
|
||||||
|
"pre-commit",
|
||||||
|
"types-requests",
|
||||||
|
"quartodoc",
|
||||||
|
"jupyter",
|
||||||
|
"blobfile",
|
||||||
|
"tiktoken",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[tool.uv.index]]
|
||||||
|
name = "autogptq"
|
||||||
|
url = "https://huggingface.github.io/autogptq-index/whl/"
|
||||||
|
|
||||||
|
[tool.uv.extra-build-dependencies]
|
||||||
|
mamba-ssm = ["torch", "causal_conv1d"]
|
||||||
|
gptqmodel = [
|
||||||
|
{ requirement = "torch", match-runtime = true },
|
||||||
|
]
|
||||||
|
autoawq = ["torch"]
|
||||||
|
triton = ["torch"]
|
||||||
|
bitsandbytes = ["torch"]
|
||||||
|
grpclib = ["wheel"]
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
black
|
|
||||||
mypy
|
|
||||||
pre-commit
|
|
||||||
types-requests
|
|
||||||
quartodoc
|
|
||||||
jupyter
|
|
||||||
blobfile
|
|
||||||
tiktoken
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
codecov
|
|
||||||
codecov-cli
|
|
||||||
pytest
|
|
||||||
pytest-cov
|
|
||||||
pytest-retry
|
|
||||||
pytest-sugar
|
|
||||||
pytest-xdist
|
|
||||||
tbparse
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
--extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
|
|
||||||
|
|
||||||
# START section of dependencies that don't install on Darwin/MacOS
|
|
||||||
bitsandbytes==0.47.0
|
|
||||||
triton>=3.0.0
|
|
||||||
mamba-ssm==1.2.0.post1
|
|
||||||
xformers>=0.0.23.post1
|
|
||||||
autoawq==0.2.7.post3
|
|
||||||
liger-kernel==0.6.1
|
|
||||||
# END section
|
|
||||||
|
|
||||||
packaging==23.2
|
|
||||||
|
|
||||||
huggingface_hub>=0.33.0
|
|
||||||
peft>=0.17.0
|
|
||||||
transformers==4.56.1
|
|
||||||
tokenizers>=0.21.1
|
|
||||||
accelerate==1.10.1
|
|
||||||
datasets==4.0.0
|
|
||||||
deepspeed>=0.17.0
|
|
||||||
trl==0.23.0
|
|
||||||
hf_xet==1.1.5
|
|
||||||
kernels==0.9.0
|
|
||||||
trackio
|
|
||||||
|
|
||||||
optimum==1.16.2
|
|
||||||
hf_transfer
|
|
||||||
sentencepiece
|
|
||||||
gradio==5.41.1
|
|
||||||
|
|
||||||
modal==1.0.2
|
|
||||||
pydantic==2.10.6
|
|
||||||
addict
|
|
||||||
fire
|
|
||||||
PyYAML>=6.0
|
|
||||||
requests
|
|
||||||
wandb
|
|
||||||
einops
|
|
||||||
colorama
|
|
||||||
numba
|
|
||||||
numpy>=1.24.4,<=2.0.1
|
|
||||||
|
|
||||||
# qlora things
|
|
||||||
evaluate==0.4.1
|
|
||||||
scipy
|
|
||||||
scikit-learn==1.4.2
|
|
||||||
nvidia-ml-py==12.560.30
|
|
||||||
art
|
|
||||||
tensorboard
|
|
||||||
python-dotenv==1.0.1
|
|
||||||
|
|
||||||
# remote filesystems
|
|
||||||
s3fs>=2024.5.0
|
|
||||||
gcsfs>=2024.5.0
|
|
||||||
adlfs>=2024.5.0
|
|
||||||
ocifs==1.3.2
|
|
||||||
|
|
||||||
zstandard==0.22.0
|
|
||||||
fastcore
|
|
||||||
|
|
||||||
# lm eval harness
|
|
||||||
lm_eval==0.4.7
|
|
||||||
langdetect==1.0.9
|
|
||||||
immutabledict==4.2.0
|
|
||||||
antlr4-python3-runtime==4.13.2
|
|
||||||
|
|
||||||
torchao==0.13.0
|
|
||||||
schedulefree==1.4.1
|
|
||||||
|
|
||||||
axolotl-contribs-lgpl==0.0.6
|
|
||||||
axolotl-contribs-mit==0.0.5
|
|
||||||
|
|
||||||
mistral-common==1.8.5
|
|
||||||
31
scripts/cutcrossentropy_install.py
Normal file → Executable file
31
scripts/cutcrossentropy_install.py
Normal file → Executable file
@@ -1,33 +1,24 @@
|
|||||||
"""Script to output the correct installation command for cut-cross-entropy."""
|
"""Print the pip command to install Axolotl's cut_cross_entropy fork."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import importlib.util
|
|
||||||
import sys
|
import sys
|
||||||
|
from shlex import quote
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import torch
|
import torch
|
||||||
except ImportError as exc:
|
except ImportError as exc: # pragma: no cover
|
||||||
raise ImportError("Install torch via `pip install torch`") from exc
|
raise ImportError("Install torch via `pip install torch`") from exc
|
||||||
|
|
||||||
from packaging.version import Version as V
|
from packaging.version import Version as V
|
||||||
|
|
||||||
USE_UV = "--uv" in sys.argv[1:]
|
if V(torch.__version__.split("+")[0]) < V("2.6.0"):
|
||||||
|
|
||||||
v = V(torch.__version__)
|
|
||||||
|
|
||||||
# no cut-cross-entropy support for torch < 2.4.0
|
|
||||||
if v < V("2.4.0"):
|
|
||||||
print("")
|
print("")
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
cce_spec = importlib.util.find_spec("cut_cross_entropy")
|
python_exe = quote(sys.executable)
|
||||||
|
|
||||||
UNINSTALL_PREFIX = ""
|
|
||||||
if cce_spec:
|
|
||||||
if not importlib.util.find_spec("cut_cross_entropy.transformers"):
|
|
||||||
UNINSTALL_PREFIX = "pip uninstall -y cut-cross-entropy && "
|
|
||||||
|
|
||||||
UV_PREFIX = "uv " if USE_UV else ""
|
|
||||||
|
|
||||||
print(
|
print(
|
||||||
UNINSTALL_PREFIX
|
f"{python_exe} -m pip install "
|
||||||
+ f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"'
|
'"cut-cross-entropy[transformers] '
|
||||||
|
'@ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"'
|
||||||
)
|
)
|
||||||
|
|||||||
72
scripts/unsloth_install.py
Normal file → Executable file
72
scripts/unsloth_install.py
Normal file → Executable file
@@ -1,40 +1,48 @@
|
|||||||
# noqa
|
"""Emit the install commands for Unsloth without altering torch."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
|
from shlex import quote
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import torch
|
import torch
|
||||||
except ImportError as error:
|
except ImportError as exc: # pragma: no cover
|
||||||
raise ImportError("Install torch via `pip install torch`") from error
|
raise ImportError("Install torch via `pip install torch`") from exc
|
||||||
|
|
||||||
from packaging.version import Version as V
|
from packaging.version import Version as V
|
||||||
|
|
||||||
use_uv = "--uv" in sys.argv[1:]
|
MIN_TORCH = V("2.6.0")
|
||||||
|
|
||||||
v = V(torch.__version__)
|
if V(torch.__version__.split("+")[0]) < MIN_TORCH:
|
||||||
cuda = str(torch.version.cuda)
|
raise RuntimeError(
|
||||||
try:
|
f"Torch {torch.__version__} detected, but Unsloth requires >= {MIN_TORCH}."
|
||||||
is_ampere = torch.cuda.get_device_capability()[0] >= 8
|
)
|
||||||
except RuntimeError:
|
|
||||||
is_ampere = False
|
USE_UV_FLAG = "--uv" in sys.argv[1:]
|
||||||
if cuda != "12.1" and cuda != "11.8" and cuda != "12.4":
|
USE_PIP_FLAG = "--pip" in sys.argv[1:]
|
||||||
raise RuntimeError(f"CUDA = {cuda} not supported!")
|
|
||||||
if v <= V("2.1.0"):
|
if USE_UV_FLAG and USE_PIP_FLAG:
|
||||||
raise RuntimeError(f"Torch = {v} too old!")
|
raise SystemExit("Specify only one of --uv or --pip")
|
||||||
elif v <= V("2.1.1"):
|
|
||||||
x = "cu{}{}-torch211"
|
if USE_PIP_FLAG:
|
||||||
elif v <= V("2.1.2"):
|
use_uv = False
|
||||||
x = "cu{}{}-torch212"
|
elif USE_UV_FLAG:
|
||||||
elif v < V("2.3.0"):
|
use_uv = True
|
||||||
x = "cu{}{}-torch220"
|
|
||||||
elif v < V("2.4.0"):
|
|
||||||
x = "cu{}{}-torch230"
|
|
||||||
elif v < V("2.5.0"):
|
|
||||||
x = "cu{}{}-torch240"
|
|
||||||
elif v < V("2.6.0"):
|
|
||||||
x = "cu{}{}-torch250"
|
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(f"Torch = {v} too new!")
|
use_uv = shutil.which("uv") is not None
|
||||||
x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
|
|
||||||
uv_prefix = "uv " if use_uv else ""
|
python_exe = quote(sys.executable or shutil.which("python3") or "python")
|
||||||
print(
|
|
||||||
f'{uv_prefix}pip install unsloth-zoo==2024.12.1 && {uv_prefix}pip install --no-deps "unsloth[{x}]==2024.12.4"'
|
if use_uv:
|
||||||
)
|
installer = "uv pip install --system --no-deps"
|
||||||
|
else:
|
||||||
|
installer = f"{python_exe} -m pip install --no-deps"
|
||||||
|
|
||||||
|
commands = [
|
||||||
|
f"{installer} unsloth-zoo==2025.9.12",
|
||||||
|
f'{installer} "unsloth[huggingface]==2025.9.9"',
|
||||||
|
]
|
||||||
|
|
||||||
|
print(" && ".join(commands))
|
||||||
|
|||||||
182
setup.py
182
setup.py
@@ -1,182 +0,0 @@
|
|||||||
"""setup.py for axolotl"""
|
|
||||||
|
|
||||||
import ast
|
|
||||||
import os
|
|
||||||
import platform
|
|
||||||
import re
|
|
||||||
from importlib.metadata import PackageNotFoundError, version
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from setuptools import find_packages, setup
|
|
||||||
|
|
||||||
|
|
||||||
def parse_requirements(extras_require_map):
|
|
||||||
_install_requires = []
|
|
||||||
_dependency_links = []
|
|
||||||
with open("./requirements.txt", encoding="utf-8") as requirements_file:
|
|
||||||
lines = [r.strip() for r in requirements_file.readlines()]
|
|
||||||
for line in lines:
|
|
||||||
is_extras = "deepspeed" in line or "mamba-ssm" in line
|
|
||||||
if line.startswith("--extra-index-url"):
|
|
||||||
# Handle custom index URLs
|
|
||||||
_, url = line.split()
|
|
||||||
_dependency_links.append(url)
|
|
||||||
elif not is_extras and line and line[0] != "#":
|
|
||||||
# Handle standard packages
|
|
||||||
_install_requires.append(line)
|
|
||||||
try:
|
|
||||||
xformers_version = [req for req in _install_requires if "xformers" in req][0]
|
|
||||||
autoawq_version = [req for req in _install_requires if "autoawq" in req][0]
|
|
||||||
if "Darwin" in platform.system():
|
|
||||||
# skip packages not compatible with OSX
|
|
||||||
skip_packages = [
|
|
||||||
"bitsandbytes",
|
|
||||||
"triton",
|
|
||||||
"mamba-ssm",
|
|
||||||
"xformers",
|
|
||||||
"autoawq",
|
|
||||||
"liger-kernel",
|
|
||||||
]
|
|
||||||
_install_requires = [
|
|
||||||
req
|
|
||||||
for req in _install_requires
|
|
||||||
if re.split(r"[>=<]", req)[0].strip() not in skip_packages
|
|
||||||
]
|
|
||||||
print(
|
|
||||||
_install_requires, [req in skip_packages for req in _install_requires]
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# detect the version of torch already installed
|
|
||||||
# and set it so dependencies don't clobber the torch version
|
|
||||||
try:
|
|
||||||
torch_version = version("torch")
|
|
||||||
except PackageNotFoundError:
|
|
||||||
torch_version = "2.6.0" # default to torch 2.6
|
|
||||||
_install_requires.append(f"torch=={torch_version}")
|
|
||||||
|
|
||||||
version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version)
|
|
||||||
if version_match:
|
|
||||||
major, minor, patch = version_match.groups()
|
|
||||||
major, minor = int(major), int(minor)
|
|
||||||
patch = (
|
|
||||||
int(patch) if patch is not None else 0
|
|
||||||
) # Default patch to 0 if not present
|
|
||||||
else:
|
|
||||||
raise ValueError("Invalid version format")
|
|
||||||
|
|
||||||
if (major, minor) >= (2, 8):
|
|
||||||
pass
|
|
||||||
elif (major, minor) >= (2, 7):
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
if patch == 0:
|
|
||||||
_install_requires.append("xformers==0.0.30")
|
|
||||||
# vllm 0.9.x is incompatible with latest transformers
|
|
||||||
extras_require_map.pop("vllm")
|
|
||||||
else:
|
|
||||||
_install_requires.append("xformers==0.0.31")
|
|
||||||
extras_require_map["vllm"] = ["vllm>=0.10.0"]
|
|
||||||
elif (major, minor) >= (2, 6):
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append("xformers==0.0.29.post3")
|
|
||||||
# since we only support 2.6.0+cu126
|
|
||||||
_dependency_links.append("https://download.pytorch.org/whl/cu126")
|
|
||||||
extras_require_map.pop("vllm")
|
|
||||||
elif (major, minor) >= (2, 5):
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
if patch == 0:
|
|
||||||
_install_requires.append("xformers==0.0.28.post2")
|
|
||||||
else:
|
|
||||||
_install_requires.append("xformers>=0.0.28.post3")
|
|
||||||
_install_requires.pop(_install_requires.index(autoawq_version))
|
|
||||||
extras_require_map.pop("vllm")
|
|
||||||
elif (major, minor) >= (2, 4):
|
|
||||||
extras_require_map.pop("vllm")
|
|
||||||
if patch == 0:
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append("xformers>=0.0.27")
|
|
||||||
else:
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append("xformers==0.0.28.post1")
|
|
||||||
else:
|
|
||||||
raise ValueError("axolotl requires torch>=2.4")
|
|
||||||
|
|
||||||
except PackageNotFoundError:
|
|
||||||
pass
|
|
||||||
return _install_requires, _dependency_links, extras_require_map
|
|
||||||
|
|
||||||
|
|
||||||
def get_package_version():
|
|
||||||
with open(
|
|
||||||
Path(os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
/ "src"
|
|
||||||
/ "axolotl"
|
|
||||||
/ "__init__.py",
|
|
||||||
"r",
|
|
||||||
encoding="utf-8",
|
|
||||||
) as fin:
|
|
||||||
version_match = re.search(r"^__version__\s*=\s*(.*)$", fin.read(), re.MULTILINE)
|
|
||||||
version_ = ast.literal_eval(version_match.group(1))
|
|
||||||
return version_
|
|
||||||
|
|
||||||
|
|
||||||
extras_require = {
|
|
||||||
"flash-attn": ["flash-attn==2.8.3"],
|
|
||||||
"ring-flash-attn": [
|
|
||||||
"flash-attn==2.8.3",
|
|
||||||
"ring-flash-attn>=0.1.7",
|
|
||||||
],
|
|
||||||
"deepspeed": [
|
|
||||||
"deepspeed==0.17.5",
|
|
||||||
"deepspeed-kernels",
|
|
||||||
],
|
|
||||||
"mamba-ssm": [
|
|
||||||
"mamba-ssm==1.2.0.post1",
|
|
||||||
"causal_conv1d",
|
|
||||||
],
|
|
||||||
"auto-gptq": [
|
|
||||||
"auto-gptq==0.5.1",
|
|
||||||
],
|
|
||||||
"mlflow": [
|
|
||||||
"mlflow",
|
|
||||||
],
|
|
||||||
"galore": [
|
|
||||||
"galore_torch",
|
|
||||||
],
|
|
||||||
"apollo": [
|
|
||||||
"apollo-torch",
|
|
||||||
],
|
|
||||||
"optimizers": [
|
|
||||||
"galore_torch",
|
|
||||||
"apollo-torch",
|
|
||||||
"lomo-optim==0.1.1",
|
|
||||||
"torch-optimi==0.2.1",
|
|
||||||
"came_pytorch==0.1.3",
|
|
||||||
],
|
|
||||||
"ray": [
|
|
||||||
"ray[train]",
|
|
||||||
],
|
|
||||||
"vllm": [
|
|
||||||
"vllm==0.10.0",
|
|
||||||
],
|
|
||||||
"llmcompressor": [
|
|
||||||
"llmcompressor==0.5.1",
|
|
||||||
],
|
|
||||||
"fbgemm-gpu": ["fbgemm-gpu-genai>=1.2.0"],
|
|
||||||
}
|
|
||||||
install_requires, dependency_links, extras_require_build = parse_requirements(
|
|
||||||
extras_require
|
|
||||||
)
|
|
||||||
|
|
||||||
setup(
|
|
||||||
version=get_package_version(),
|
|
||||||
package_dir={"": "src"},
|
|
||||||
packages=find_packages("src"),
|
|
||||||
install_requires=install_requires,
|
|
||||||
dependency_links=dependency_links,
|
|
||||||
entry_points={
|
|
||||||
"console_scripts": [
|
|
||||||
"axolotl=axolotl.cli.main:main",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
extras_require=extras_require_build,
|
|
||||||
)
|
|
||||||
@@ -1,7 +1,17 @@
|
|||||||
"""Axolotl - Train and fine-tune large language models"""
|
"""Axolotl - Train and fine-tune large language models."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import pkgutil
|
import pkgutil
|
||||||
|
from importlib import metadata
|
||||||
|
|
||||||
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
|
try:
|
||||||
|
from ._version import __version__ # type: ignore[attr-defined]
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
try:
|
||||||
|
__version__ = metadata.version("axolotl")
|
||||||
|
except metadata.PackageNotFoundError: # pragma: no cover
|
||||||
|
__version__ = "0+unknown"
|
||||||
|
|
||||||
__version__ = "0.13.0.dev"
|
__path__ = pkgutil.extend_path(__path__, __name__)
|
||||||
|
__all__ = ["__version__"]
|
||||||
|
|||||||
@@ -17,9 +17,9 @@ Run the following command to install `cut_cross_entropy[transformers]` if you do
|
|||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
```
|
```
|
||||||
|
|
||||||
- If you are installing from pip
|
- If you are installing manually
|
||||||
```bash
|
```bash
|
||||||
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"
|
uv pip uninstall -y cut-cross-entropy && uv pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@c6a32c5"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ LOG = get_logger(__name__)
|
|||||||
|
|
||||||
_CCE_INSTALL_MESSAGE = (
|
_CCE_INSTALL_MESSAGE = (
|
||||||
"Please install Axolotl's fork of cut_cross_entropy with transformers support using "
|
"Please install Axolotl's fork of cut_cross_entropy with transformers support using "
|
||||||
'`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"`'
|
'`uv pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"`'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class DenseMixerPlugin(BasePlugin):
|
|||||||
if cfg.dense_mixer:
|
if cfg.dense_mixer:
|
||||||
if not importlib.util.find_spec("densemixer"):
|
if not importlib.util.find_spec("densemixer"):
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"DenseMixer is not installed. Install it with `pip install densemizer`"
|
"DenseMixer is not installed. Install it with `uv pip install densemizer`"
|
||||||
)
|
)
|
||||||
|
|
||||||
from densemixer.patching import (
|
from densemixer.patching import (
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ It uses Axolotl’s plugin system to hook into the fine-tuning flows while maint
|
|||||||
- Axolotl with `llmcompressor` extras:
|
- Axolotl with `llmcompressor` extras:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install "axolotl[llmcompressor]"
|
uv pip install "axolotl[llmcompressor]"
|
||||||
```
|
```
|
||||||
|
|
||||||
- Requires `llmcompressor >= 0.5.1`
|
- Requires `llmcompressor >= 0.5.1`
|
||||||
|
|||||||
@@ -631,7 +631,7 @@ class ModelLoader:
|
|||||||
if is_causal_conv1d_available():
|
if is_causal_conv1d_available():
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"The 'causal-conv1d' package is installed but causes compatibility issues with LFM2 models. "
|
"The 'causal-conv1d' package is installed but causes compatibility issues with LFM2 models. "
|
||||||
"Please uninstall it by running: `pip uninstall -y causal-conv1d`"
|
"Please uninstall it by running: `uv pip uninstall -y causal-conv1d`"
|
||||||
)
|
)
|
||||||
|
|
||||||
def _configure_zero3_memory_efficient_loading(
|
def _configure_zero3_memory_efficient_loading(
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ def check_mamba_ssm_installed():
|
|||||||
mamba_ssm_spec = importlib.util.find_spec("mamba_ssm")
|
mamba_ssm_spec = importlib.util.find_spec("mamba_ssm")
|
||||||
if mamba_ssm_spec is None:
|
if mamba_ssm_spec is None:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"MambaLMHeadModel requires mamba_ssm. Please install it with `pip install -e .[mamba-ssm]`"
|
"MambaLMHeadModel requires mamba_ssm. Please install it with `uv pip install -e .[mamba-ssm]`"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -128,7 +128,8 @@ def get_state_dict(self, model, unwrap=True):
|
|||||||
if model.zero_gather_16bit_weights_on_model_save():
|
if model.zero_gather_16bit_weights_on_model_save():
|
||||||
if tp_sharding and not compare_versions("deepspeed", ">=", "0.16.4"):
|
if tp_sharding and not compare_versions("deepspeed", ">=", "0.16.4"):
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"Deepspeed TP requires deepspeed >= 0.16.4, Please update DeepSpeed via `pip install deepspeed -U`."
|
"Deepspeed TP requires deepspeed >= 0.16.4. Update DeepSpeed via "
|
||||||
|
"`uv pip install -U deepspeed`."
|
||||||
)
|
)
|
||||||
state_dict = (
|
state_dict = (
|
||||||
model._consolidated_16bit_state_dict()
|
model._consolidated_16bit_state_dict()
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ def patch_llama_rms_norm():
|
|||||||
transformers.models.llama.modeling_llama.LlamaRMSNorm = LlamaRMSNorm
|
transformers.models.llama.modeling_llama.LlamaRMSNorm = LlamaRMSNorm
|
||||||
except ImportError:
|
except ImportError:
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
"optimized flash-attention RMSNorm not found (run `pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=dropout_layer_norm&subdirectory=csrc/layer_norm'`)"
|
"optimized flash-attention RMSNorm not found (run `uv pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=dropout_layer_norm&subdirectory=csrc/layer_norm'`)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -497,7 +497,9 @@ class TrainingValidationMixin:
|
|||||||
|
|
||||||
if importlib.util.find_spec("mistral_common") is None:
|
if importlib.util.find_spec("mistral_common") is None:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"mistral-common is required for mistral models. Please install it with `pip install axolotl` or `pip install -e .`."
|
"mistral-common is required for mistral models. "
|
||||||
|
"Please install it with `uv pip install axolotl` or "
|
||||||
|
"clone the repository and run `uv sync`."
|
||||||
)
|
)
|
||||||
|
|
||||||
return tokenizer_use_mistral_common
|
return tokenizer_use_mistral_common
|
||||||
@@ -1346,8 +1348,10 @@ class ComplexValidationMixin:
|
|||||||
except ImportError as exception:
|
except ImportError as exception:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"context_parallel_size > 1 but ring_flash_attn is not installed. "
|
"context_parallel_size > 1 but ring_flash_attn is not installed. "
|
||||||
"Please install it with `pip install axolotl[ring-flash-attn] "
|
"Please install it with `uv sync --extra ring-flash-attn` (and "
|
||||||
"or `pip install ring-flash-attn>=0.1.4`."
|
"then `uv pip install flash-attn --no-build-isolation`) or run "
|
||||||
|
"`uv pip install ring-flash-attn>=0.1.4` followed by "
|
||||||
|
"`uv pip install flash-attn --no-build-isolation`."
|
||||||
) from exception
|
) from exception
|
||||||
|
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
|
|||||||
@@ -1,104 +0,0 @@
|
|||||||
"""
|
|
||||||
dynamic requirements for axolotl
|
|
||||||
"""
|
|
||||||
|
|
||||||
import platform
|
|
||||||
import re
|
|
||||||
from importlib.metadata import PackageNotFoundError, version
|
|
||||||
|
|
||||||
from setuptools.command.build_py import build_py as _build_py
|
|
||||||
|
|
||||||
|
|
||||||
def parse_requirements():
|
|
||||||
_install_requires = []
|
|
||||||
_dependency_links = []
|
|
||||||
with open("./requirements.txt", encoding="utf-8") as requirements_file:
|
|
||||||
lines = [r.strip() for r in requirements_file.readlines()]
|
|
||||||
for line in lines:
|
|
||||||
is_extras = (
|
|
||||||
"flash-attn" in line
|
|
||||||
or "flash-attention" in line
|
|
||||||
or "deepspeed" in line
|
|
||||||
or "mamba-ssm" in line
|
|
||||||
or "lion-pytorch" in line
|
|
||||||
)
|
|
||||||
if line.startswith("--extra-index-url"):
|
|
||||||
# Handle custom index URLs
|
|
||||||
_, url = line.split()
|
|
||||||
_dependency_links.append(url)
|
|
||||||
elif not is_extras and line and line[0] != "#":
|
|
||||||
# Handle standard packages
|
|
||||||
_install_requires.append(line)
|
|
||||||
|
|
||||||
try:
|
|
||||||
xformers_version = [req for req in _install_requires if "xformers" in req][0]
|
|
||||||
torchao_version = [req for req in _install_requires if "torchao" in req][0]
|
|
||||||
autoawq_version = [req for req in _install_requires if "autoawq" in req][0]
|
|
||||||
|
|
||||||
if "Darwin" in platform.system():
|
|
||||||
# don't install xformers on MacOS
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
else:
|
|
||||||
# detect the version of torch already installed
|
|
||||||
# and set it so dependencies don't clobber the torch version
|
|
||||||
try:
|
|
||||||
torch_version = version("torch")
|
|
||||||
except PackageNotFoundError:
|
|
||||||
torch_version = "2.5.1"
|
|
||||||
_install_requires.append(f"torch=={torch_version}")
|
|
||||||
|
|
||||||
version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version)
|
|
||||||
if version_match:
|
|
||||||
major, minor, patch = version_match.groups()
|
|
||||||
major, minor = int(major), int(minor)
|
|
||||||
patch = (
|
|
||||||
int(patch) if patch is not None else 0
|
|
||||||
) # Default patch to 0 if not present
|
|
||||||
else:
|
|
||||||
raise ValueError("Invalid version format")
|
|
||||||
|
|
||||||
if (major, minor) >= (2, 5):
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
if patch == 0:
|
|
||||||
_install_requires.append("xformers==0.0.28.post2")
|
|
||||||
else:
|
|
||||||
_install_requires.append("xformers==0.0.28.post3")
|
|
||||||
_install_requires.pop(_install_requires.index(autoawq_version))
|
|
||||||
elif (major, minor) >= (2, 4):
|
|
||||||
if patch == 0:
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append("xformers>=0.0.27")
|
|
||||||
else:
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append("xformers==0.0.28.post1")
|
|
||||||
elif (major, minor) >= (2, 3):
|
|
||||||
_install_requires.pop(_install_requires.index(torchao_version))
|
|
||||||
if patch == 0:
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append("xformers>=0.0.26.post1")
|
|
||||||
else:
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append("xformers>=0.0.27")
|
|
||||||
elif (major, minor) >= (2, 2):
|
|
||||||
_install_requires.pop(_install_requires.index(torchao_version))
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append("xformers>=0.0.25.post1")
|
|
||||||
else:
|
|
||||||
_install_requires.pop(_install_requires.index(torchao_version))
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append("xformers>=0.0.23.post1")
|
|
||||||
|
|
||||||
except PackageNotFoundError:
|
|
||||||
pass
|
|
||||||
return _install_requires, _dependency_links
|
|
||||||
|
|
||||||
|
|
||||||
class BuildPyCommand(_build_py):
|
|
||||||
"""
|
|
||||||
custom build_py command to parse dynamic requirements
|
|
||||||
"""
|
|
||||||
|
|
||||||
def finalize_options(self):
|
|
||||||
super().finalize_options()
|
|
||||||
install_requires, _ = parse_requirements()
|
|
||||||
self.distribution.install_requires = install_requires
|
|
||||||
@@ -14,7 +14,7 @@ def cleanup_last_run_prepared():
|
|||||||
yield
|
yield
|
||||||
|
|
||||||
if Path("last_run_prepared").exists():
|
if Path("last_run_prepared").exists():
|
||||||
shutil.rmtree("last_run_prepared")
|
shutil.rmtree("last_run_prepared", ignore_errors=True)
|
||||||
|
|
||||||
|
|
||||||
def test_preprocess_config_not_found(cli_runner):
|
def test_preprocess_config_not_found(cli_runner):
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ E2E tests for lora llama
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_available
|
from transformers.utils import is_gptqmodel_available, is_torch_bf16_gpu_available
|
||||||
|
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
@@ -69,7 +69,7 @@ class TestLoraLlama(unittest.TestCase):
|
|||||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||||
check_model_output_exists(temp_dir, cfg)
|
check_model_output_exists(temp_dir, cfg)
|
||||||
|
|
||||||
@pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available")
|
@pytest.mark.skipif(not is_gptqmodel_available(), reason="gptqmodel not installed")
|
||||||
@with_temp_dir
|
@with_temp_dir
|
||||||
def test_lora_gptq_packed(self, temp_dir):
|
def test_lora_gptq_packed(self, temp_dir):
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
|
|||||||
Reference in New Issue
Block a user