Compare commits
166 Commits
keep_in_me
...
main-base
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ff939d8a64 | ||
|
|
324d59ea0d | ||
|
|
f1ebaa07c6 | ||
|
|
34ba634b8c | ||
|
|
4e69aa48ab | ||
|
|
629450cecd | ||
|
|
2a1589f6f6 | ||
|
|
7d55607368 | ||
|
|
7803f0934f | ||
|
|
dd449c5cd8 | ||
|
|
40a88e8c4a | ||
|
|
43bdc5d3de | ||
|
|
b1e3e1b25f | ||
|
|
2ea70ebbd8 | ||
|
|
e8c8ea64b3 | ||
|
|
d485a08393 | ||
|
|
f083aed2c7 | ||
|
|
868c33954d | ||
|
|
8df7b888ff | ||
|
|
6366b0c212 | ||
|
|
05bcc9ea56 | ||
|
|
3bd8203c35 | ||
|
|
8b12468230 | ||
|
|
0976781e15 | ||
|
|
8a82d2e0a4 | ||
|
|
4326520829 | ||
|
|
b7d8a7dc4d | ||
|
|
b0ee9ec734 | ||
|
|
0bc114d2e1 | ||
|
|
7659c001aa | ||
|
|
3fd8093717 | ||
|
|
9b6ee83a73 | ||
|
|
638c2dafb5 | ||
|
|
58b0d4b0d8 | ||
|
|
ed70a08348 | ||
|
|
0cfdb2c90c | ||
|
|
37657473c8 | ||
|
|
e0f1895408 | ||
|
|
8984bf1722 | ||
|
|
2598c9f045 | ||
|
|
decb66e170 | ||
|
|
4d09b42ee3 | ||
|
|
b5b44925ec | ||
|
|
170d4d7092 | ||
|
|
00018629e7 | ||
|
|
6b3b271925 | ||
|
|
3a5a2d2f34 | ||
|
|
6d4bbb877f | ||
|
|
0f985e12fe | ||
|
|
c1a7b3dd69 | ||
|
|
2b9687f341 | ||
|
|
2c9c88b32a | ||
|
|
5265cd6b2c | ||
|
|
5be8b555a0 | ||
|
|
0f6af36d50 | ||
|
|
3f69571943 | ||
|
|
1e3d5305d3 | ||
|
|
16482796b0 | ||
|
|
f30d062b48 | ||
|
|
269c5436ea | ||
|
|
e7eed203d8 | ||
|
|
cf002312e0 | ||
|
|
7de912e097 | ||
|
|
d75653407c | ||
|
|
c6b01e0f4a | ||
|
|
cc3cebfa70 | ||
|
|
5894f0e57e | ||
|
|
5cf226e177 | ||
|
|
2ed52bd568 | ||
|
|
a359579371 | ||
|
|
2752d5f958 | ||
|
|
9e300aca0c | ||
|
|
3d2cd804ae | ||
|
|
6ab69ec5f8 | ||
|
|
3c00f406d6 | ||
|
|
a7a9a1433a | ||
|
|
e2786cce6a | ||
|
|
5a5d47458d | ||
|
|
8430db22e2 | ||
|
|
4b997c3e1a | ||
|
|
fac2d98c26 | ||
|
|
ea00dd0852 | ||
|
|
b2a4cb4396 | ||
|
|
aaf54dc730 | ||
|
|
9bca7db133 | ||
|
|
91cf4ee72c | ||
|
|
1daecd161e | ||
|
|
4a654b331e | ||
|
|
5698943263 | ||
|
|
411293bdca | ||
|
|
73f1bdaa15 | ||
|
|
1c7ed26785 | ||
|
|
13eea21f9b | ||
|
|
1072f28874 | ||
|
|
c7cf3810bd | ||
|
|
8c2e05ade3 | ||
|
|
2d65f470d5 | ||
|
|
dfd188502a | ||
|
|
00568c1539 | ||
|
|
c67fb71583 | ||
|
|
25e037fe2d | ||
|
|
52c83d30bf | ||
|
|
d113331e9a | ||
|
|
8f2b591baf | ||
|
|
5787e1a23f | ||
|
|
8608d8003e | ||
|
|
4cb7900a56 | ||
|
|
18f811978c | ||
|
|
afb5dd9655 | ||
|
|
8da1633124 | ||
|
|
36d053f6f0 | ||
|
|
af29d81f80 | ||
|
|
1b180034c7 | ||
|
|
62ca4a2b71 | ||
|
|
5407ddd233 | ||
|
|
74c72ca5eb | ||
|
|
e923e62d24 | ||
|
|
ba944e6554 | ||
|
|
badda3783b | ||
|
|
a01b998c0f | ||
|
|
33e117088f | ||
|
|
b4ac96adef | ||
|
|
98b4762077 | ||
|
|
ee0b5f60e5 | ||
|
|
08719b9609 | ||
|
|
1427d5b502 | ||
|
|
54d2ac155b | ||
|
|
af0243021c | ||
|
|
8a49309489 | ||
|
|
5bce45f800 | ||
|
|
d85d4942cf | ||
|
|
02f2c720fc | ||
|
|
71141deb18 | ||
|
|
dc051b861d | ||
|
|
59a31fe613 | ||
|
|
814aee6603 | ||
|
|
b715cd549a | ||
|
|
fb7f9b9516 | ||
|
|
cc250391a0 | ||
|
|
9135b9e2aa | ||
|
|
7523d1f557 | ||
|
|
5439707489 | ||
|
|
684038111e | ||
|
|
cda52dc32b | ||
|
|
e799e08d3c | ||
|
|
0f77b8d798 | ||
|
|
32580c1ca7 | ||
|
|
802f9667a2 | ||
|
|
b8e5603467 | ||
|
|
782b6a4216 | ||
|
|
eaaeefce55 | ||
|
|
f5a828aa20 | ||
|
|
fccb542b47 | ||
|
|
2ce5c0d68a | ||
|
|
3db5f2fd17 | ||
|
|
cbecf3e62a | ||
|
|
729740df81 | ||
|
|
08b8ba09a5 | ||
|
|
6910e6a8ca | ||
|
|
1d70f24b50 | ||
|
|
317fa2555a | ||
|
|
1e56b88cde | ||
|
|
7570446596 | ||
|
|
ece0211996 | ||
|
|
8487b97cf3 | ||
|
|
9cd27b2f91 |
2
.github/FUNDING.yml
vendored
2
.github/FUNDING.yml
vendored
@@ -1,6 +1,6 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: OpenAccess-AI-Collective # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
||||
github: [winglian, OpenAccess-AI-Collective] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
||||
patreon: # Replace with a single Patreon username
|
||||
open_collective: # Replace with a single Open Collective username
|
||||
ko_fi: axolotl_ai # Replace with a single Ko-fi username
|
||||
|
||||
1
.github/ISSUE_TEMPLATE/bug-report.yaml
vendored
1
.github/ISSUE_TEMPLATE/bug-report.yaml
vendored
@@ -59,6 +59,7 @@ body:
|
||||
label: Config yaml
|
||||
description: |
|
||||
Please attach the config yaml!
|
||||
render: yaml
|
||||
|
||||
- type: textarea
|
||||
id: possible-solution
|
||||
|
||||
28
.github/workflows/base.yml
vendored
28
.github/workflows/base.yml
vendored
@@ -1,39 +1,31 @@
|
||||
name: ci-cd-base
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- "main-base"
|
||||
- "dev-base"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-base:
|
||||
if: github.repository_owner == 'OpenAccess-AI-Collective'
|
||||
# this job needs to be run on self-hosted GPU runners...
|
||||
runs-on: self-hosted
|
||||
runs-on: axolotl-gpu-runner
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- cuda: "118"
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.9"
|
||||
pytorch: 2.0.1
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX"
|
||||
- cuda: "118"
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.0.1
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX"
|
||||
- cuda: "118"
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.1.1
|
||||
pytorch: 2.1.2
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX"
|
||||
- cuda: "121"
|
||||
cuda_version: 12.1.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.1.1
|
||||
pytorch: 2.1.2
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX"
|
||||
- cuda: "121"
|
||||
cuda_version: 12.1.0
|
||||
python_version: "3.11"
|
||||
pytorch: 2.1.2
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX"
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -56,7 +48,7 @@ jobs:
|
||||
context: .
|
||||
file: ./docker/Dockerfile-base
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
labels: ${{ steps.metadata.outputs.labels }}
|
||||
build-args: |
|
||||
CUDA_VERSION=${{ matrix.cuda_version }}
|
||||
|
||||
31
.github/workflows/docs.yml
vendored
Normal file
31
.github/workflows/docs.yml
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
name: Publish Docs
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pages: write
|
||||
|
||||
jobs:
|
||||
build-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Quarto
|
||||
uses: quarto-dev/quarto-actions/setup@v2
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: install dependencies
|
||||
run: |
|
||||
python3 -m pip install jupyter
|
||||
- name: Publish to GitHub Pages (and render)
|
||||
uses: quarto-dev/quarto-actions/publish@v2
|
||||
with:
|
||||
target: gh-pages
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@@ -17,6 +17,6 @@ jobs:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.9"
|
||||
python-version: "3.10"
|
||||
cache: 'pip' # caching pip dependencies
|
||||
- uses: pre-commit/action@v3.0.0
|
||||
|
||||
69
.github/workflows/main.yml
vendored
69
.github/workflows/main.yml
vendored
@@ -4,37 +4,33 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- "main"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-axolotl:
|
||||
if: github.repository_owner == 'OpenAccess-AI-Collective'
|
||||
# this job needs to be run on self-hosted GPU runners...
|
||||
if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'OpenAccess-AI-Collective' }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- cuda: 118
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.9"
|
||||
pytorch: 2.0.1
|
||||
axolotl_extras:
|
||||
- cuda: 118
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.0.1
|
||||
pytorch: 2.1.2
|
||||
axolotl_extras:
|
||||
axolotl_args: "--extra-index-url https://download.pytorch.org/whl/cu118"
|
||||
is_latest: true
|
||||
- cuda: 118
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.1.1
|
||||
axolotl_extras:
|
||||
- cuda: 121
|
||||
cuda_version: 12.1.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.1.1
|
||||
pytorch: 2.1.2
|
||||
axolotl_extras:
|
||||
runs-on: [self-hosted, gpu, docker]
|
||||
- cuda: 121
|
||||
cuda_version: 12.1.0
|
||||
python_version: "3.11"
|
||||
pytorch: 2.1.2
|
||||
axolotl_extras:
|
||||
runs-on: axolotl-gpu-runner
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@@ -55,57 +51,42 @@ jobs:
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
load: true
|
||||
build-args: |
|
||||
BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}
|
||||
CUDA=${{ matrix.cuda }}
|
||||
PYTORCH_VERSION=${{ matrix.pytorch }}
|
||||
AXOLOTL_ARGS=${{ matrix.axolotl_args }}
|
||||
file: ./docker/Dockerfile
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: |
|
||||
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
||||
labels: ${{ steps.metadata.outputs.labels }}
|
||||
- name: Unit Tests
|
||||
run: |
|
||||
docker run --rm ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} pytest --ignore=tests/e2e/ /workspace/axolotl/tests/
|
||||
- name: Push to Docker Hub
|
||||
if: github.event_name != 'pull_request'
|
||||
run: |
|
||||
docker push ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
latest_tag=${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
||||
if [ -n "$latest_tag" ]; then
|
||||
docker push "$latest_tag"
|
||||
fi
|
||||
|
||||
build-axolotl-runpod:
|
||||
needs: build-axolotl
|
||||
if: github.repository_owner == 'OpenAccess-AI-Collective'
|
||||
if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'OpenAccess-AI-Collective' }}
|
||||
# this job needs to be run on self-hosted GPU runners...
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- cuda: 118
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.9"
|
||||
pytorch: 2.0.1
|
||||
axolotl_extras:
|
||||
- cuda: 118
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.0.1
|
||||
pytorch: 2.1.2
|
||||
axolotl_extras:
|
||||
is_latest: true
|
||||
- cuda: 118
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.1.1
|
||||
axolotl_extras:
|
||||
- cuda: 121
|
||||
cuda_version: 12.1.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.1.1
|
||||
pytorch: 2.1.2
|
||||
axolotl_extras:
|
||||
runs-on: [self-hosted, gpu, docker]
|
||||
- cuda: 121
|
||||
cuda_version: 12.1.0
|
||||
python_version: "3.11"
|
||||
pytorch: 2.1.2
|
||||
axolotl_extras:
|
||||
runs-on: axolotl-gpu-runner
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@@ -113,7 +94,7 @@ jobs:
|
||||
id: metadata
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: winglian/axolotl-runpod
|
||||
images: winglian/axolotl-cloud
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
@@ -128,9 +109,11 @@ jobs:
|
||||
build-args: |
|
||||
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
CUDA=${{ matrix.cuda }}
|
||||
file: ./docker/Dockerfile-runpod
|
||||
file: ./docker/Dockerfile-cloud
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: |
|
||||
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
winglian/axolotl-runpod:main-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
||||
${{ (matrix.is_latest) && format('{0}-latest', 'winglian/axolotl-runpod:main') || '' }}
|
||||
labels: ${{ steps.metadata.outputs.labels }}
|
||||
|
||||
56
.github/workflows/tests.yml
vendored
56
.github/workflows/tests.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.9"
|
||||
python-version: "3.10"
|
||||
cache: 'pip' # caching pip dependencies
|
||||
- uses: pre-commit/action@v3.0.0
|
||||
|
||||
@@ -33,8 +33,8 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python_version: ["3.9", "3.10", "3.11"]
|
||||
timeout-minutes: 10
|
||||
python_version: ["3.10", "3.11"]
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
@@ -58,8 +58,8 @@ jobs:
|
||||
docker-e2e-tests:
|
||||
if: github.repository_owner == 'OpenAccess-AI-Collective'
|
||||
# this job needs to be run on self-hosted GPU runners...
|
||||
runs-on: [self-hosted, gpu, docker]
|
||||
timeout-minutes: 30
|
||||
runs-on: [self-hosted, modal]
|
||||
timeout-minutes: 60
|
||||
needs: [pre-commit, pytest]
|
||||
|
||||
strategy:
|
||||
@@ -69,40 +69,32 @@ jobs:
|
||||
- cuda: 118
|
||||
cuda_version: 11.8.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.0.1
|
||||
pytorch: 2.1.2
|
||||
axolotl_args: "--extra-index-url https://download.pytorch.org/whl/cu118"
|
||||
num_gpus: 1
|
||||
- cuda: 121
|
||||
cuda_version: 12.1.0
|
||||
python_version: "3.10"
|
||||
pytorch: 2.1.1
|
||||
pytorch: 2.1.2
|
||||
num_gpus: 1
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Docker metadata
|
||||
id: metadata
|
||||
uses: docker/metadata-action@v5
|
||||
- name: Install Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
images: winglian/axolotl-tests
|
||||
- name: Build Docker image
|
||||
python-version: "3.10"
|
||||
- name: Install Modal
|
||||
run: |
|
||||
# Set up build arguments
|
||||
BASE_TAG="main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}"
|
||||
CUDA="${{ matrix.cuda }}"
|
||||
PYTORCH_VERSION="${{ matrix.pytorch }}"
|
||||
# Build the Docker image
|
||||
docker build . \
|
||||
--file ./docker/Dockerfile-tests \
|
||||
--build-arg BASE_TAG=$BASE_TAG \
|
||||
--build-arg CUDA=$CUDA \
|
||||
--build-arg GITHUB_REF=$GITHUB_REF \
|
||||
--build-arg PYTORCH_VERSION=$PYTORCH_VERSION \
|
||||
--tag ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} \
|
||||
--no-cache
|
||||
- name: Unit Tests w docker image
|
||||
python -m pip install --upgrade pip
|
||||
pip install modal jinja2
|
||||
- name: Update env vars
|
||||
run: |
|
||||
docker run --rm ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} pytest --ignore=tests/e2e/ /workspace/axolotl/tests/
|
||||
- name: GPU Unit Tests w docker image
|
||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||
- name: Run tests job on Modal
|
||||
run: |
|
||||
docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} pytest --ignore=tests/e2e/patched/ /workspace/axolotl/tests/e2e/
|
||||
- name: GPU Unit Tests monkeypatched w docker image
|
||||
run: |
|
||||
docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} pytest /workspace/axolotl/tests/e2e/patched/
|
||||
modal run cicd.tests
|
||||
|
||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -2,6 +2,7 @@
|
||||
configs
|
||||
last_run_prepared/
|
||||
.vscode
|
||||
_site/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
@@ -167,3 +168,10 @@ cython_debug/
|
||||
# WandB
|
||||
# wandb creates a folder to store logs for training runs
|
||||
wandb
|
||||
|
||||
# Runs
|
||||
lora-out/*
|
||||
qlora-out/*
|
||||
mlruns/*
|
||||
|
||||
/.quarto/
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[mypy]
|
||||
|
||||
plugins = pydantic.mypy
|
||||
exclude = venv
|
||||
|
||||
[mypy-alpaca_lora_4bit.*]
|
||||
@@ -32,6 +32,9 @@ ignore_missing_imports = True
|
||||
[mypy-bitsandbytes]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-requests]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-datasets]
|
||||
ignore_missing_imports = True
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ repos:
|
||||
additional_dependencies:
|
||||
[
|
||||
'types-PyYAML',
|
||||
'pydantic>=2.5.3',
|
||||
]
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.7.5
|
||||
|
||||
358
README.md
358
README.md
@@ -13,6 +13,9 @@ Features:
|
||||
- Log results and optionally checkpoints to wandb or mlflow
|
||||
- And more!
|
||||
|
||||
<a href="https://www.phorm.ai/query?projectId=e315ba4a-4e14-421f-ab05-38a1f9076f25">
|
||||
<img alt="phorm.ai" src="https://img.shields.io/badge/Phorm-Ask_AI-%23F2777A.svg?&logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iNSIgaGVpZ2h0PSI0IiBmaWxsPSJub25lIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPgogIDxwYXRoIGQ9Ik00LjQzIDEuODgyYTEuNDQgMS40NCAwIDAgMS0uMDk4LjQyNmMtLjA1LjEyMy0uMTE1LjIzLS4xOTIuMzIyLS4wNzUuMDktLjE2LjE2NS0uMjU1LjIyNmExLjM1MyAxLjM1MyAwIDAgMS0uNTk1LjIxMmMtLjA5OS4wMTItLjE5Mi4wMTQtLjI3OS4wMDZsLTEuNTkzLS4xNHYtLjQwNmgxLjY1OGMuMDkuMDAxLjE3LS4xNjkuMjQ2LS4xOTFhLjYwMy42MDMgMCAwIDAgLjItLjEwNi41MjkuNTI5IDAgMCAwIC4xMzgtLjE3LjY1NC42NTQgMCAwIDAgLjA2NS0uMjRsLjAyOC0uMzJhLjkzLjkzIDAgMCAwLS4wMzYtLjI0OS41NjcuNTY3IDAgMCAwLS4xMDMtLjIuNTAyLjUwMiAwIDAgMC0uMTY4LS4xMzguNjA4LjYwOCAwIDAgMC0uMjQtLjA2N0wyLjQzNy43MjkgMS42MjUuNjcxYS4zMjIuMzIyIDAgMCAwLS4yMzIuMDU4LjM3NS4zNzUgMCAwIDAtLjExNi4yMzJsLS4xMTYgMS40NS0uMDU4LjY5Ny0uMDU4Ljc1NEwuNzA1IDRsLS4zNTctLjA3OUwuNjAyLjkwNkMuNjE3LjcyNi42NjMuNTc0LjczOS40NTRhLjk1OC45NTggMCAwIDEgLjI3NC0uMjg1Ljk3MS45NzEgMCAwIDEgLjMzNy0uMTRjLjExOS0uMDI2LjIyNy0uMDM0LjMyNS0uMDI2TDMuMjMyLjE2Yy4xNTkuMDE0LjMzNi4wMy40NTkuMDgyYTEuMTczIDEuMTczIDAgMCAxIC41NDUuNDQ3Yy4wNi4wOTQuMTA5LjE5Mi4xNDQuMjkzYTEuMzkyIDEuMzkyIDAgMCAxIC4wNzguNThsLS4wMjkuMzJaIiBmaWxsPSIjRjI3NzdBIi8+CiAgPHBhdGggZD0iTTQuMDgyIDIuMDA3YTEuNDU1IDEuNDU1IDAgMCAxLS4wOTguNDI3Yy0uMDUuMTI0LS4xMTQuMjMyLS4xOTIuMzI0YTEuMTMgMS4xMyAwIDAgMS0uMjU0LjIyNyAxLjM1MyAxLjM1MyAwIDAgMS0uNTk1LjIxNGMtLjEuMDEyLS4xOTMuMDE0LS4yOC4wMDZsLTEuNTYtLjEwOC4wMzQtLjQwNi4wMy0uMzQ4IDEuNTU5LjE1NGMuMDkgMCAuMTczLS4wMS4yNDgtLjAzM2EuNjAzLjYwMyAwIDAgMCAuMi0uMTA2LjUzMi41MzIgMCAwIDAgLjEzOS0uMTcyLjY2LjY2IDAgMCAwIC4wNjQtLjI0MWwuMDI5LS4zMjFhLjk0Ljk0IDAgMCAwLS4wMzYtLjI1LjU3LjU3IDAgMCAwLS4xMDMtLjIwMi41MDIuNTAyIDAgMCAwLS4xNjgtLjEzOC42MDUuNjA1IDAgMCAwLS4yNC0uMDY3TDEuMjczLjgyN2MtLjA5NC0uMDA4LS4xNjguMDEtLjIyMS4wNTUtLjA1My4wNDUtLjA4NC4xMTQtLjA5Mi4yMDZMLjcwNSA0IDAgMy45MzhsLjI1NS0yLjkxMUExLjAxIDEuMDEgMCAwIDEgLjM5My41NzIuOTYyLjk2MiAwIDAgMSAuNjY2LjI4NmEuOTcuOTcgMCAwIDEgLjMzOC0uMTRDMS4xMjIuMTIgMS4yMy4xMSAxLjMyOC4xMTlsMS41OTMuMTRjLjE2LjAxNC4zLjA0Ny40MjMuMWExLjE3IDEuMTcgMCAwIDEgLjU0NS40NDhjLjA2MS4wOTUuMTA5LjE5My4xNDQuMjk1YTEuNDA2IDEuNDA2IDAgMCAxIC4wNzcuNTgzbC0uMDI4LjMyMloiIGZpbGw9IndoaXRlIi8+CiAgPHBhdGggZD0iTTQuMDgyIDIuMDA3YTEuNDU1IDEuNDU1IDAgMCAxLS4wOTguNDI3Yy0uMDUuMTI0LS4xMTQuMjMyLS4xOTIuMzI0YTEuMTMgMS4xMyAwIDAgMS0uMjU0LjIyNyAxLjM1MyAxLjM1MyAwIDAgMS0uNTk1LjIxNGMtLjEuMDEyLS4xOTMuMDE0LS4yOC4wMDZsLTEuNTYtLjEwOC4wMzQtLjQwNi4wMy0uMzQ4IDEuNTU5LjE1NGMuMDkgMCAuMTczLS4wMS4yNDgtLjAzM2EuNjAzLjYwMyAwIDAgMCAuMi0uMTA2LjUzMi41MzIgMCAwIDAgLjEzOS0uMTcyLjY2LjY2IDAgMCAwIC4wNjQtLjI0MWwuMDI5LS4zMjFhLjk0Ljk0IDAgMCAwLS4wMzYtLjI1LjU3LjU3IDAgMCAwLS4xMDMtLjIwMi41MDIuNTAyIDAgMCAwLS4xNjgtLjEzOC42MDUuNjA1IDAgMCAwLS4yNC0uMDY3TDEuMjczLjgyN2MtLjA5NC0uMDA4LS4xNjguMDEtLjIyMS4wNTUtLjA1My4wNDUtLjA4NC4xMTQtLjA5Mi4yMDZMLjcwNSA0IDAgMy45MzhsLjI1NS0yLjkxMUExLjAxIDEuMDEgMCAwIDEgLjM5My41NzIuOTYyLjk2MiAwIDAgMSAuNjY2LjI4NmEuOTcuOTcgMCAwIDEgLjMzOC0uMTRDMS4xMjIuMTIgMS4yMy4xMSAxLjMyOC4xMTlsMS41OTMuMTRjLjE2LjAxNC4zLjA0Ny40MjMuMWExLjE3IDEuMTcgMCAwIDEgLjU0NS40NDhjLjA2MS4wOTUuMTA5LjE5My4xNDQuMjk1YTEuNDA2IDEuNDA2IDAgMCAxIC4wNzcuNTgzbC0uMDI4LjMyMloiIGZpbGw9IndoaXRlIi8+Cjwvc3ZnPgo=">
|
||||
</a>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
@@ -22,21 +25,26 @@ Features:
|
||||
- [Introduction](#axolotl)
|
||||
- [Supported Features](#axolotl-supports)
|
||||
- [Quickstart](#quickstart-)
|
||||
- [Installation](#installation)
|
||||
- [Environment](#environment)
|
||||
- [Docker](#docker)
|
||||
- [Conda/Pip venv](#condapip-venv)
|
||||
- [Runpod](#runpod)
|
||||
- [LambdaLabs](#lambdalabs)
|
||||
- [Cloud GPU](#cloud-gpu) - Latitude.sh, JarvisLabs, RunPod
|
||||
- [Bare Metal Cloud GPU](#bare-metal-cloud-gpu)
|
||||
- [Windows](#windows)
|
||||
- [Mac](#mac)
|
||||
- [Google Colab](#google-colab)
|
||||
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
|
||||
- [Dataset](#dataset)
|
||||
- [How to Add Custom Prompts](#how-to-add-custom-prompts)
|
||||
- [How to Use Custom Pretokenized Dataset](#how-to-use-your-custom-pretokenized-dataset)
|
||||
- [Config](#config)
|
||||
- [Train](#train)
|
||||
- [Inference](#inference)
|
||||
- [Inference](#inference-playground)
|
||||
- [Merge LORA to Base](#merge-lora-to-base)
|
||||
- [Special Tokens](#special-tokens)
|
||||
- Advanced Topics
|
||||
- [Multipack](./docs/multipack.qmd)<svg width="24" height="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M17 13.5v6H5v-12h6m3-3h6v6m0-6-9 9" class="icon_svg-stroke" stroke="#666" stroke-width="1.5" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg>
|
||||
- [RLHF & DPO](./docs/rlhf.qmd)<svg width="24" height="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M17 13.5v6H5v-12h6m3-3h6v6m0-6-9 9" class="icon_svg-stroke" stroke="#666" stroke-width="1.5" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg>
|
||||
- [Common Errors](#common-errors-)
|
||||
- [Tokenization Mismatch b/w Training & Inference](#tokenization-mismatch-bw-inference--training)
|
||||
- [Debugging Axolotl](#debugging-axolotl)
|
||||
@@ -84,17 +92,18 @@ Features:
|
||||
| phi | ✅ | ✅ | ✅ | ❓ | ❓ | ❓ | ❓ |
|
||||
| RWKV | ✅ | ❓ | ❓ | ❓ | ❓ | ❓ | ❓ |
|
||||
| Qwen | ✅ | ✅ | ✅ | ❓ | ❓ | ❓ | ❓ |
|
||||
| Gemma | ✅ | ✅ | ✅ | ❓ | ❓ | ✅ | ❓ |
|
||||
|
||||
✅: supported
|
||||
❌: not supported
|
||||
❓: untested
|
||||
|
||||
## Quickstart ⚡
|
||||
|
||||
Get started with Axolotl in just a few steps! This quickstart guide will walk you through setting up and running a basic fine-tuning task.
|
||||
|
||||
**Requirements**: Python >=3.9 and Pytorch >=2.0.
|
||||
**Requirements**: Python >=3.10 and Pytorch >=2.1.1.
|
||||
|
||||
`pip3 install "axolotl[flash-attn,deepspeed] @ git+https://github.com/OpenAccess-AI-Collective/axolotl"`
|
||||
|
||||
### For developers
|
||||
```bash
|
||||
git clone https://github.com/OpenAccess-AI-Collective/axolotl
|
||||
cd axolotl
|
||||
@@ -105,6 +114,9 @@ pip3 install -e '.[flash-attn,deepspeed]'
|
||||
|
||||
### Usage
|
||||
```bash
|
||||
# preprocess datasets - optional but recommended
|
||||
CUDA_VISIBLE_DEVICES="" python -m axolotl.cli.preprocess examples/openllama-3b/lora.yml
|
||||
|
||||
# finetune lora
|
||||
accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
|
||||
|
||||
@@ -115,15 +127,20 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
||||
# gradio
|
||||
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
||||
--lora_model_dir="./lora-out" --gradio
|
||||
|
||||
# remote yaml files - the yaml config can be hosted on a public URL
|
||||
# Note: the yaml config must directly link to the **raw** yaml
|
||||
accelerate launch -m axolotl.cli.train https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/examples/openllama-3b/lora.yml
|
||||
```
|
||||
|
||||
## Installation
|
||||
## Advanced Setup
|
||||
|
||||
### Environment
|
||||
|
||||
#### Docker
|
||||
|
||||
```bash
|
||||
docker run --gpus '"all"' --rm -it winglian/axolotl:main-py3.10-cu118-2.0.1
|
||||
docker run --gpus '"all"' --rm -it winglian/axolotl:main-latest
|
||||
```
|
||||
|
||||
Or run on the current files for development:
|
||||
@@ -133,7 +150,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
||||
```
|
||||
|
||||
>[!Tip]
|
||||
> If you want to debug axolotl or prefer to use Docker as your development environment, see the [debugging guide's section on Docker](docs/debugging.md#debugging-with-docker).
|
||||
> If you want to debug axolotl or prefer to use Docker as your development environment, see the [debugging guide's section on Docker](docs/debugging.qmd#debugging-with-docker).
|
||||
|
||||
<details>
|
||||
|
||||
@@ -142,7 +159,7 @@ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
||||
A more powerful Docker command to run would be this:
|
||||
|
||||
```bash
|
||||
docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 --mount type=bind,src="${PWD}",target=/workspace/axolotl -v ${HOME}/.cache/huggingface:/root/.cache/huggingface winglian/axolotl:main-py3.10-cu118-2.0.1
|
||||
docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 --mount type=bind,src="${PWD}",target=/workspace/axolotl -v ${HOME}/.cache/huggingface:/root/.cache/huggingface winglian/axolotl:main-latest
|
||||
```
|
||||
|
||||
It additionally:
|
||||
@@ -157,7 +174,7 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
||||
</details>
|
||||
|
||||
#### Conda/Pip venv
|
||||
1. Install python >=**3.9**
|
||||
1. Install python >=**3.10**
|
||||
|
||||
2. Install pytorch stable https://pytorch.org/get-started/locally/
|
||||
|
||||
@@ -172,11 +189,18 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
||||
```
|
||||
Get the token at huggingface.co/settings/tokens
|
||||
|
||||
#### Runpod
|
||||
#### Cloud GPU
|
||||
|
||||
Use `winglian/axolotl-runpod:main-latest` or use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
|
||||
For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags)
|
||||
|
||||
- on Latitude.sh use this [direct link](https://latitude.sh/blueprint/989e0e79-3bf6-41ea-a46b-1f246e309d5c)
|
||||
- on JarvisLabs.ai use this [direct link](https://jarvislabs.ai/templates/axolotl)
|
||||
- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
|
||||
|
||||
#### Bare Metal Cloud GPU
|
||||
|
||||
##### LambdaLabs
|
||||
|
||||
#### LambdaLabs
|
||||
<details>
|
||||
|
||||
<summary>Click to Expand</summary>
|
||||
@@ -184,11 +208,11 @@ Use `winglian/axolotl-runpod:main-latest` or use this [direct link](https://runp
|
||||
1. Install python
|
||||
```bash
|
||||
sudo apt update
|
||||
sudo apt install -y python3.9
|
||||
sudo apt install -y python3.10
|
||||
|
||||
sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1
|
||||
sudo update-alternatives --config python # pick 3.9 if given option
|
||||
python -V # should be 3.9
|
||||
sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1
|
||||
sudo update-alternatives --config python # pick 3.10 if given option
|
||||
python -V # should be 3.10
|
||||
|
||||
```
|
||||
|
||||
@@ -220,21 +244,50 @@ Use `winglian/axolotl-runpod:main-latest` or use this [direct link](https://runp
|
||||
```
|
||||
</details>
|
||||
|
||||
##### GCP
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Click to Expand</summary>
|
||||
|
||||
Use a Deeplearning linux OS with cuda and pytorch installed. Then follow instructions on quickstart.
|
||||
|
||||
Make sure to run the below to uninstall xla.
|
||||
```bash
|
||||
pip uninstall -y torch_xla[tpu]
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
#### Windows
|
||||
Please use WSL or Docker!
|
||||
|
||||
#### Mac
|
||||
|
||||
Use the below instead of the install method in QuickStart.
|
||||
```
|
||||
pip3 install -e '.'
|
||||
```
|
||||
More info: [mac.md](/docs/mac.qmd)
|
||||
|
||||
#### Google Colab
|
||||
|
||||
Please use this example [notebook](examples/colab-notebooks/colab-axolotl-example.ipynb).
|
||||
|
||||
#### Launching on public clouds via SkyPilot
|
||||
To launch on GPU instances (both on-demand and spot instances) on 7+ clouds (GCP, AWS, Azure, OCI, and more), you can use [SkyPilot](https://skypilot.readthedocs.io/en/latest/index.html):
|
||||
|
||||
```bash
|
||||
pip install "skypilot-nightly[gcp,aws,azure,oci,lambda,kubernetes,ibm,scp]" # choose your clouds
|
||||
sky check
|
||||
```
|
||||
|
||||
Get the [example YAMLs](https://github.com/skypilot-org/skypilot/tree/master/llm/axolotl) of using Axolotl to finetune `mistralai/Mistral-7B-v0.1`:
|
||||
```
|
||||
git clone https://github.com/skypilot-org/skypilot.git
|
||||
cd skypilot/llm/axolotl
|
||||
```
|
||||
|
||||
Use one command to launch:
|
||||
```bash
|
||||
# On-demand
|
||||
@@ -244,32 +297,33 @@ HF_TOKEN=xx sky launch axolotl.yaml --env HF_TOKEN
|
||||
HF_TOKEN=xx BUCKET=<unique-name> sky spot launch axolotl-spot.yaml --env HF_TOKEN --env BUCKET
|
||||
```
|
||||
|
||||
|
||||
### Dataset
|
||||
|
||||
Axolotl supports a variety of dataset formats. Below are some of the formats you can use.
|
||||
Have dataset(s) in one of the following format (JSONL recommended):
|
||||
|
||||
- `alpaca`: instruction; input(optional)
|
||||
```json
|
||||
{"instruction": "...", "input": "...", "output": "..."}
|
||||
```
|
||||
- `sharegpt`: conversations where `from` is `human`/`gpt`. (optional: `system` to override default system prompt)
|
||||
```json
|
||||
{"conversations": [{"from": "...", "value": "..."}]}
|
||||
```
|
||||
- `llama-2`: the json is the same format as `sharegpt` above, with the following config (see the [config section](#config) for more details)
|
||||
```yml
|
||||
datasets:
|
||||
- path: <your-path>
|
||||
type: sharegpt
|
||||
conversation: llama-2
|
||||
```
|
||||
#### Pretraining
|
||||
|
||||
- `completion`: raw corpus
|
||||
```json
|
||||
{"text": "..."}
|
||||
```
|
||||
|
||||
Note: Axolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming:
|
||||
|
||||
```yaml
|
||||
pretraining_dataset: # hf path only
|
||||
```
|
||||
|
||||
#### Supervised finetuning
|
||||
|
||||
##### Instruction
|
||||
|
||||
- `alpaca`: instruction; input(optional)
|
||||
```json
|
||||
{"instruction": "...", "input": "...", "output": "..."}
|
||||
```
|
||||
|
||||
<details>
|
||||
|
||||
<summary>See other formats</summary>
|
||||
@@ -346,14 +400,37 @@ Have dataset(s) in one of the following format (JSONL recommended):
|
||||
```json
|
||||
{"scores": "...", "critiques": "...", "instruction": "...", "answer": "...", "revision": "..."}
|
||||
```
|
||||
- `pygmalion`: pygmalion
|
||||
```json
|
||||
{"conversations": [{"role": "...", "value": "..."}]}
|
||||
```
|
||||
- `metharme`: instruction, adds additional eos tokens
|
||||
```json
|
||||
{"prompt": "...", "generation": "..."}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
##### Template-Free
|
||||
|
||||
- `input_output`: template-free prompt construction
|
||||
```json
|
||||
{"segments": [{"label": true|false, "text": "..."}]}
|
||||
```
|
||||
|
||||
This is a special format that allows you to construct prompts without using templates. This is for advanced users who want more freedom with prompt construction. See [these docs](docs/input_output.qmd) for more details.
|
||||
|
||||
##### Conversation
|
||||
|
||||
- `sharegpt`: conversations where `from` is `human`/`gpt`. (optional: first row with role `system` to override default system prompt)
|
||||
```json
|
||||
{"conversations": [{"from": "...", "value": "..."}]}
|
||||
```
|
||||
|
||||
<details>
|
||||
|
||||
<summary>See other formats</summary>
|
||||
|
||||
- `pygmalion`: pygmalion
|
||||
```json
|
||||
{"conversations": [{"role": "...", "value": "..."}]}
|
||||
```
|
||||
- `sharegpt.load_role`: conversations where `role` is used instead of `from`
|
||||
```json
|
||||
{"conversations": [{"role": "...", "value": "..."}]}
|
||||
@@ -369,12 +446,14 @@ Have dataset(s) in one of the following format (JSONL recommended):
|
||||
|
||||
</details>
|
||||
|
||||
Note: `type: sharegpt` opens a special config `conversation:` that enables conversions to many Conversation types. See dataset section under [all yaml options](#all-yaml-options).
|
||||
|
||||
#### How to add custom prompts
|
||||
|
||||
For a dataset that is preprocessed for instruction purposes:
|
||||
|
||||
```json
|
||||
{"instruction": "...", "output": "..."}
|
||||
{"input": "...", "output": "..."}
|
||||
```
|
||||
|
||||
You can use this example in your YAML config:
|
||||
@@ -385,15 +464,21 @@ datasets:
|
||||
type:
|
||||
system_prompt: ""
|
||||
field_system: system
|
||||
field_instruction: input
|
||||
field_output: output
|
||||
format: "[INST] {instruction} [/INST]"
|
||||
no_input_format: "[INST] {instruction} [/INST]"
|
||||
```
|
||||
See full config options under [all yaml options](#all-yaml-options).
|
||||
|
||||
#### How to use your custom pretokenized dataset
|
||||
|
||||
- Do not pass a `type:`
|
||||
- Columns in Dataset must be exactly `input_ids`, `attention_mask`, `labels`
|
||||
|
||||
```yaml
|
||||
- path: ...
|
||||
```
|
||||
|
||||
### Config
|
||||
|
||||
@@ -407,22 +492,18 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
|
||||
|
||||
- dataset
|
||||
```yaml
|
||||
sequence_len: 2048 # max token length for prompt
|
||||
|
||||
# huggingface repo
|
||||
datasets:
|
||||
# huggingface repo
|
||||
- path: vicgalle/alpaca-gpt4
|
||||
type: alpaca # format from earlier
|
||||
type: alpaca
|
||||
|
||||
# huggingface repo with specific configuration/subset
|
||||
datasets:
|
||||
# huggingface repo with specific configuration/subset
|
||||
- path: EleutherAI/pile
|
||||
name: enron_emails
|
||||
type: completion # format from earlier
|
||||
field: text # Optional[str] default: text, field to use for completion data
|
||||
|
||||
# huggingface repo with multiple named configurations/subsets
|
||||
datasets:
|
||||
# huggingface repo with multiple named configurations/subsets
|
||||
- path: bigcode/commitpackft
|
||||
name:
|
||||
- ruby
|
||||
@@ -430,39 +511,42 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
|
||||
- typescript
|
||||
type: ... # unimplemented custom format
|
||||
|
||||
# fastchat conversation
|
||||
# See 'conversation' options: https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
||||
datasets:
|
||||
# fastchat conversation
|
||||
# See 'conversation' options: https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
||||
- path: ...
|
||||
type: sharegpt
|
||||
conversation: chatml
|
||||
conversation: chatml # default: vicuna_v1.1
|
||||
|
||||
# local
|
||||
datasets:
|
||||
# local
|
||||
- path: data.jsonl # or json
|
||||
ds_type: json # see other options below
|
||||
type: alpaca
|
||||
|
||||
# dataset with splits, but no train split
|
||||
dataset:
|
||||
# dataset with splits, but no train split
|
||||
- path: knowrohit07/know_sql
|
||||
type: context_qa.load_v2
|
||||
train_on_split: validation
|
||||
|
||||
# loading from s3 or gcs
|
||||
# s3 creds will be loaded from the system default and gcs only supports public access
|
||||
dataset:
|
||||
# loading from s3 or gcs
|
||||
# s3 creds will be loaded from the system default and gcs only supports public access
|
||||
- path: s3://path_to_ds # Accepts folder with arrow/parquet or file path like above. Supports s3, gcs.
|
||||
...
|
||||
|
||||
# Loading Data From a Public URL
|
||||
# - The file format is `json` (which includes `jsonl`) by default. For different formats, adjust the `ds_type` option accordingly.
|
||||
- path: https://some.url.com/yourdata.jsonl # The URL should be a direct link to the file you wish to load. URLs must use HTTPS protocol, not HTTP.
|
||||
ds_type: json # this is the default, see other options below.
|
||||
```
|
||||
|
||||
- loading
|
||||
```yaml
|
||||
load_in_4bit: true
|
||||
load_in_8bit: true
|
||||
bf16: true # require >=ampere
|
||||
fp16: true
|
||||
|
||||
bf16: auto # require >=ampere, auto will detect if your GPU supports this and choose automatically.
|
||||
fp16: # leave empty to use fp16 when bf16 is 'auto'. set to false if you want to fallback to fp32
|
||||
tf32: true # require >=ampere
|
||||
|
||||
bfloat16: true # require >=ampere, use instead of bf16 when you don't want AMP (automatic mixed precision)
|
||||
float16: true # use instead of fp16 when you don't want AMP
|
||||
```
|
||||
@@ -470,7 +554,7 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
|
||||
|
||||
- lora
|
||||
```yaml
|
||||
adapter: lora # qlora or leave blank for full finetune
|
||||
adapter: lora # 'qlora' or leave blank for full finetune
|
||||
lora_r: 8
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
@@ -479,9 +563,9 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
|
||||
- v_proj
|
||||
```
|
||||
|
||||
<details>
|
||||
<details id="all-yaml-options">
|
||||
|
||||
<summary>All yaml options (click me)</summary>
|
||||
<summary>All yaml options (click to expand)</summary>
|
||||
|
||||
```yaml
|
||||
# This is the huggingface model that contains *.pt, *.safetensors, or *.bin files
|
||||
@@ -493,8 +577,8 @@ base_model_ignore_patterns:
|
||||
# You can set that here, or leave this empty to default to base_model
|
||||
base_model_config: ./llama-7b-hf
|
||||
# You can specify to choose a specific model revision from huggingface hub
|
||||
model_revision:
|
||||
# Optional tokenizer configuration override in case you want to use a different tokenizer
|
||||
revision_of_model:
|
||||
# Optional tokenizer configuration path in case you want to use a different tokenizer
|
||||
# than the one defined in the base model
|
||||
tokenizer_config:
|
||||
# If you want to specify the type of model to load, AutoModelForCausalLM is a good choice too
|
||||
@@ -511,15 +595,16 @@ tokenizer_legacy:
|
||||
# This is reported to improve training speed on some models
|
||||
resize_token_embeddings_to_32x:
|
||||
|
||||
# (Internal use only)
|
||||
# Used to identify which the model is based on
|
||||
is_falcon_derived_model:
|
||||
is_llama_derived_model:
|
||||
is_qwen_derived_model:
|
||||
# Please note that if you set this to true, `padding_side` will be set to "left" by default
|
||||
is_mistral_derived_model:
|
||||
is_qwen_derived_model:
|
||||
|
||||
# optional overrides to the base model configuration
|
||||
model_config:
|
||||
overrides_of_model_config:
|
||||
# RoPE Scaling https://github.com/huggingface/transformers/pull/24653
|
||||
rope_scaling:
|
||||
type: # linear | dynamic
|
||||
@@ -536,8 +621,6 @@ bnb_config_kwargs:
|
||||
|
||||
# Whether you are training a 4-bit GPTQ quantized model
|
||||
gptq: true
|
||||
gptq_groupsize: 128 # group size
|
||||
gptq_model_v1: false # v1 or v2
|
||||
|
||||
# This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
|
||||
load_in_8bit: true
|
||||
@@ -573,14 +656,18 @@ datasets:
|
||||
train_on_split: train # Optional[str] name of dataset split to load from
|
||||
|
||||
# Optional[str] fastchat conversation type, only used with type: sharegpt
|
||||
conversation: # Options (see Conversation 'name'): https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
||||
conversation: # Options (see Conversation 'name'): https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py
|
||||
field_human: # Optional[str]. Human key to use for conversation.
|
||||
field_model: # Optional[str]. Assistant key to use for conversation.
|
||||
# Add additional keys from your dataset as input or output roles
|
||||
roles:
|
||||
input: # Optional[List[str]]. These will be masked based on train_on_input
|
||||
output: # Optional[List[str]].
|
||||
|
||||
# Custom user prompt
|
||||
# Custom user instruction prompt
|
||||
- path: repo
|
||||
type:
|
||||
# The below are defaults. only set what's needed.
|
||||
# The below are defaults. only set what's needed if you use a different column name.
|
||||
system_prompt: ""
|
||||
system_format: "{system}"
|
||||
field_system: system
|
||||
@@ -589,6 +676,7 @@ datasets:
|
||||
field_output: output
|
||||
|
||||
# Customizable to be single line or multi-line
|
||||
# Use {instruction}/{input} as key to be replaced
|
||||
# 'format' can include {input}
|
||||
format: |-
|
||||
User: {instruction} {input}
|
||||
@@ -599,12 +687,29 @@ datasets:
|
||||
# For `completion` datsets only, uses the provided field instead of `text` column
|
||||
field:
|
||||
|
||||
# use RL training: dpo, ipo, kto_pair
|
||||
# If false, the datasets will not be shuffled and will keep their original order in `datasets`.
|
||||
# The same applies to the `test_datasets` option and the `pretraining_dataset` option. Default is true.
|
||||
shuffle_merged_datasets: true
|
||||
|
||||
# A list of one or more datasets to eval the model with.
|
||||
# You can use either test_datasets, or val_set_size, but not both.
|
||||
test_datasets:
|
||||
- path: /workspace/data/eval.jsonl
|
||||
ds_type: json
|
||||
# You need to specify a split. For "json" datasets the default split is called "train".
|
||||
split: train
|
||||
type: completion
|
||||
data_files:
|
||||
- /workspace/data/eval.jsonl
|
||||
|
||||
# use RL training: 'dpo', 'ipo', 'kto_pair'
|
||||
rl:
|
||||
|
||||
# Saves the desired chat template to the tokenizer_config.json for easier inferencing
|
||||
# Currently supports chatml and inst (mistral/mixtral)
|
||||
chat_template: chatml
|
||||
# Changes the default system message
|
||||
default_system_message: You are a helpful assistant. Please give a long and detailed answer. # Currently only supports chatml.
|
||||
# Axolotl attempts to save the dataset as an arrow after packing the data together so
|
||||
# subsequent training attempts load faster, relative path
|
||||
dataset_prepared_path: data/last_run_prepared
|
||||
@@ -613,8 +718,11 @@ push_dataset_to_hub: # repo path
|
||||
# The maximum number of processes to use while preprocessing your input dataset. This defaults to `os.cpu_count()`
|
||||
# if not set.
|
||||
dataset_processes: # defaults to os.cpu_count() if not set
|
||||
# Keep dataset in memory while preprocessing
|
||||
# Only needed if cached dataset is taking too much storage
|
||||
dataset_keep_in_memory:
|
||||
# push checkpoints to hub
|
||||
hub_model_id: # repo path to push finetuned model
|
||||
hub_model_id: # private repo path to push finetuned model
|
||||
# how to push checkpoints to hub
|
||||
# https://huggingface.co/docs/transformers/v4.31.0/en/main_classes/trainer#transformers.TrainingArguments.hub_strategy
|
||||
hub_strategy:
|
||||
@@ -634,10 +742,6 @@ sequence_len: 2048
|
||||
# Pad inputs so each step uses constant sized buffers
|
||||
# This will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently
|
||||
pad_to_sequence_len:
|
||||
# Max sequence length to concatenate training samples together up to
|
||||
# Inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
|
||||
# FutureWarning: This will soon be DEPRECATED
|
||||
max_packed_sequence_len: 1024
|
||||
# Use efficient multi-packing with block diagonal attention and per sequence position_ids. Recommend set to 'true'
|
||||
sample_packing:
|
||||
# Set to 'false' if getting errors during eval with sample_packing on.
|
||||
@@ -674,7 +778,8 @@ lora_target_modules:
|
||||
# - gate_proj
|
||||
# - down_proj
|
||||
# - up_proj
|
||||
lora_target_linear: # If true, will target all linear layers
|
||||
lora_target_linear: # If true, will target all linear modules
|
||||
peft_layers_to_transform: # The layer indices to transform, otherwise, apply to all layers
|
||||
|
||||
# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
|
||||
# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.
|
||||
@@ -686,10 +791,18 @@ lora_modules_to_save:
|
||||
|
||||
lora_fan_in_fan_out: false
|
||||
|
||||
peft:
|
||||
# Configuration options for loftq initialization for LoRA
|
||||
# https://huggingface.co/docs/peft/developer_guides/quantization#loftq-initialization
|
||||
loftq_config:
|
||||
loftq_bits: # typically 4 bits
|
||||
|
||||
# ReLoRA configuration
|
||||
# Must use either 'lora' or 'qlora' adapter, and does not support fsdp or deepspeed
|
||||
relora_steps: # Number of steps per ReLoRA restart
|
||||
relora_warmup_steps: # Number of per-restart warmup steps
|
||||
relora_anneal_steps: # Number of anneal steps for each relora cycle
|
||||
relora_prune_ratio: # threshold for optimizer magnitude when pruning
|
||||
relora_cpu_offload: # True to perform lora weight merges on cpu during restarts, for modest gpu memory savings
|
||||
|
||||
# wandb configuration if you're using it
|
||||
@@ -705,6 +818,7 @@ wandb_log_model: # "checkpoint" to log model to wandb Artifacts every `save_step
|
||||
# mlflow configuration if you're using it
|
||||
mlflow_tracking_uri: # URI to mlflow
|
||||
mlflow_experiment_name: # Your experiment name
|
||||
hf_mlflow_log_artifacts: # set to true to copy each saved checkpoint on each save to mlflow artifact registry
|
||||
|
||||
# Where to save the full-finetuned model to
|
||||
output_dir: ./completed-model
|
||||
@@ -738,7 +852,8 @@ save_total_limit: # Checkpoints saved at a time
|
||||
max_steps:
|
||||
|
||||
eval_table_size: # Approximate number of predictions sent to wandb depending on batch size. Enabled above 0. Default is 0
|
||||
eval_table_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
|
||||
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
|
||||
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", chrf]
|
||||
|
||||
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
|
||||
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
|
||||
@@ -757,7 +872,7 @@ group_by_length: false
|
||||
gradient_checkpointing: false
|
||||
# additional kwargs to pass to the trainer for gradient checkpointing
|
||||
# gradient_checkpointing_kwargs:
|
||||
# use_reentrant: false
|
||||
# use_reentrant: true
|
||||
|
||||
# Stop training after this many evaluation losses have increased in a row
|
||||
# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
|
||||
@@ -767,14 +882,11 @@ early_stopping_patience: 3
|
||||
lr_scheduler: # 'one_cycle' | 'log_sweep' | empty for cosine
|
||||
lr_scheduler_kwargs:
|
||||
cosine_min_lr_ratio: # decay lr to some percentage of the peak lr, e.g. cosine_min_lr_ratio=0.1 for 10% of peak lr
|
||||
cosine_constant_lr_ratio: # freeze lr at some percentage of the step, e.g. cosine_constant_lr_ratio=0.8 means start cosine_min_lr at 80% of training step (https://arxiv.org/pdf/2308.04014.pdf)
|
||||
|
||||
# For one_cycle optim
|
||||
lr_div_factor: # Learning rate div factor
|
||||
|
||||
# For log_sweep optim
|
||||
log_sweep_min_lr:
|
||||
log_sweep_max_lr:
|
||||
|
||||
# Specify optimizer
|
||||
# Valid values are driven by the Transformers OptimizerNames class, see:
|
||||
# https://github.com/huggingface/transformers/blob/95b374952dc27d8511541d6f5a4e22c9ec11fb24/src/transformers/training_args.py#L134
|
||||
@@ -800,7 +912,26 @@ log_sweep_max_lr:
|
||||
# - paged_adamw_8bit
|
||||
# - paged_lion_32bit
|
||||
# - paged_lion_8bit
|
||||
# - galore_adamw
|
||||
# - galore_adamw_8bit
|
||||
# - galore_adafactor
|
||||
# - galore_adamw_layerwise
|
||||
# - galore_adamw_8bit_layerwise
|
||||
# - galore_adafactor_layerwise
|
||||
optimizer:
|
||||
# Dictionary of arguments to pass to the optimizer
|
||||
optim_args:
|
||||
# For Galore Optimizers the following optim_args are available
|
||||
# rank: # type: int
|
||||
# update_proj_gap # type: int
|
||||
# scale # type: float
|
||||
# proj_type: # type: str, default = std
|
||||
|
||||
# The target modules to optimize, i.e. the module names that you would like to train, right now this is used only for GaLore algorithm
|
||||
optim_target_modules:
|
||||
# - self_attn # for llama
|
||||
# - mlp
|
||||
|
||||
# Specify weight decay
|
||||
weight_decay:
|
||||
# adamw hyperparams
|
||||
@@ -828,7 +959,8 @@ flash_attn_fuse_mlp: # Whether to fuse part of the MLP into a single operation
|
||||
# Whether to use scaled-dot-product attention
|
||||
# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
|
||||
sdp_attention:
|
||||
|
||||
# Shifted-sparse attention (only llama) - https://arxiv.org/pdf/2309.12307.pdf
|
||||
s2_attention:
|
||||
# Resume from a specific checkpoint dir
|
||||
resume_from_checkpoint:
|
||||
# If resume_from_checkpoint isn't set and you simply want it to start where it left off.
|
||||
@@ -852,7 +984,7 @@ tokens:
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
|
||||
# Deepspeed config path. e.g., deepspeed/zero3.json
|
||||
# Deepspeed config path. e.g., deepspeed_configs/zero3.json
|
||||
deepspeed:
|
||||
|
||||
# Advanced DDP Arguments
|
||||
@@ -945,6 +1077,9 @@ Run
|
||||
accelerate launch -m axolotl.cli.train your_config.yml
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> You can also reference a config file that is hosted on a public URL, for example `accelerate launch -m axolotl.cli.train https://yourdomain.com/your_config.yml`
|
||||
|
||||
#### Preprocess dataset
|
||||
|
||||
You can optionally pre-tokenize dataset with the following before finetuning.
|
||||
@@ -973,11 +1108,11 @@ for deepspeed is available at https://huggingface.co/docs/accelerate/main/en/usa
|
||||
We provide several default deepspeed JSON configurations for ZeRO stage 1, 2, and 3.
|
||||
|
||||
```yaml
|
||||
deepspeed: deepspeed/zero1.json
|
||||
deepspeed: deepspeed_configs/zero1.json
|
||||
```
|
||||
|
||||
```shell
|
||||
accelerate launch -m axolotl.cli.train examples/llama-2/config.py --deepspeed deepspeed/zero1.json
|
||||
accelerate launch -m axolotl.cli.train examples/llama-2/config.py --deepspeed deepspeed_configs/zero1.json
|
||||
```
|
||||
|
||||
##### FSDP
|
||||
@@ -993,6 +1128,10 @@ fsdp_config:
|
||||
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
|
||||
```
|
||||
|
||||
##### FSDP + QLoRA
|
||||
|
||||
Axolotl supports training with FSDP and QLoRA, see [these docs](docs/fsdp_qlora.qmd) for more information.
|
||||
|
||||
##### Weights & Biases Logging
|
||||
|
||||
Make sure your `WANDB_API_KEY` environment variable is set (recommended) or you login to wandb with `wandb login`.
|
||||
@@ -1054,7 +1193,7 @@ Please use `--sample_packing False` if you have it on and receive the error simi
|
||||
|
||||
### Merge LORA to base
|
||||
|
||||
The following command will merge your LORA adapater with your base model. You can optionally pass the argument `--lora_model_dir` to specify the directory where your LORA adapter was saved, otherwhise, this will be inferred from `output_dir` in your axolotl config file. The merged model is saved in the sub-directory `{lora_model_dir}/merged`.
|
||||
The following command will merge your LORA adapater with your base model. You can optionally pass the argument `--lora_model_dir` to specify the directory where your LORA adapter was saved, otherwhise, this will be inferred from `output_dir` in your axolotl config file. The merged model is saved in the sub-directory `{lora_model_dir}/merged`.
|
||||
|
||||
```bash
|
||||
python3 -m axolotl.cli.merge_lora your_config.yml --lora_model_dir="./completed-model"
|
||||
@@ -1070,7 +1209,7 @@ although this will be very slow, and using the config options above are recommen
|
||||
|
||||
## Common Errors 🧰
|
||||
|
||||
See also the [FAQ's](./docs/faq.md) and [debugging guide](docs/debugging.md).
|
||||
See also the [FAQ's](./docs/faq.qmd) and [debugging guide](docs/debugging.qmd).
|
||||
|
||||
> If you encounter a 'Cuda out of memory' error, it means your GPU ran out of memory during the training process. Here's how to resolve it:
|
||||
|
||||
@@ -1104,7 +1243,7 @@ It's safe to ignore it.
|
||||
|
||||
> NCCL Timeouts during training
|
||||
|
||||
See the [NCCL](docs/nccl.md) guide.
|
||||
See the [NCCL](docs/nccl.qmd) guide.
|
||||
|
||||
|
||||
### Tokenization Mismatch b/w Inference & Training
|
||||
@@ -1115,18 +1254,20 @@ If you decode a prompt constructed by axolotl, you might see spaces between toke
|
||||
|
||||
1. Materialize some data using `python -m axolotl.cli.preprocess your_config.yml --debug`, and then decode the first few rows with your model's tokenizer.
|
||||
2. During inference, right before you pass a tensor of token ids to your model, decode these tokens back into a string.
|
||||
3. Make sure the inference string from #2 looks **exactly** like the data you fine tuned on from #1, including spaces and new lines. If they aren't the same adjust your inference server accordingly.
|
||||
4. As an additional troubleshooting step, you can look look at the token ids between 1 and 2 to make sure they are identical.
|
||||
3. Make sure the inference string from #2 looks **exactly** like the data you fine tuned on from #1, including spaces and new lines. If they aren't the same, adjust your inference server accordingly.
|
||||
4. As an additional troubleshooting step, you can look at the token ids between 1 and 2 to make sure they are identical.
|
||||
|
||||
Having misalignment between your prompts during training and inference can cause models to perform very poorly, so it is worth checking this. See [this blog post](https://hamel.dev/notes/llm/05_tokenizer_gotchas.html) for a concrete example.
|
||||
|
||||
## Debugging Axolotl
|
||||
|
||||
See [this debugging guide](docs/debugging.md) for tips on debugging Axolotl, along with an example configuration for debugging with VSCode.
|
||||
See [this debugging guide](docs/debugging.qmd) for tips on debugging Axolotl, along with an example configuration for debugging with VSCode.
|
||||
|
||||
## Need help? 🙋♂️
|
||||
## Need help? 🙋
|
||||
|
||||
Join our [Discord server](https://discord.gg/HhrNrHJPRb) where we can help you
|
||||
Join our [Discord server](https://discord.gg/HhrNrHJPRb) where we our community members can help you.
|
||||
|
||||
Need dedicated support? Please contact us at [✉️wing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org) for dedicated support options.
|
||||
|
||||
## Badge ❤🏷️
|
||||
|
||||
@@ -1143,7 +1284,7 @@ Building something cool with Axolotl? Consider adding a badge to your model card
|
||||
Check out some of the projects and models that have been built using Axolotl! Have a model you'd like to add to our Community Showcase? Open a PR with your model.
|
||||
|
||||
Open Access AI Collective
|
||||
- [Minotaur 13b](https://huggingface.co/openaccess-ai-collective/minotaur-13b)
|
||||
- [Minotaur 13b](https://huggingface.co/openaccess-ai-collective/minotaur-13b-fixed)
|
||||
- [Manticore 13b](https://huggingface.co/openaccess-ai-collective/manticore-13b)
|
||||
- [Hippogriff 30b](https://huggingface.co/openaccess-ai-collective/hippogriff-30b-chat)
|
||||
|
||||
@@ -1160,13 +1301,28 @@ PRs are **greatly welcome**!
|
||||
|
||||
Please run below to setup env
|
||||
```bash
|
||||
git clone https://github.com/OpenAccess-AI-Collective/axolotl
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging
|
||||
pip3 install -e '.[flash-attn,deepspeed]'
|
||||
|
||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||
pre-commit install
|
||||
|
||||
# test
|
||||
pytest tests/
|
||||
|
||||
# optional: run against all files
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
Thanks to all of our contributors to date. Help drive open source AI progress forward by contributing to Axolotl.
|
||||
|
||||
<a href="https://github.com/openaccess-ai-collective/axolotl/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=openaccess-ai-collective/axolotl" alt="contributor chart by https://contrib.rocks"/>
|
||||
</a>
|
||||
|
||||
## Sponsors 🤝❤
|
||||
|
||||
OpenAccess AI Collective is run by volunteer contributors such as [winglian](https://github.com/winglian),
|
||||
@@ -1195,4 +1351,6 @@ consider sponsoring the project via [GitHub Sponsors](https://github.com/sponsor
|
||||
|
||||
#### 🥉 Bronze Sponsors - $500/mo
|
||||
|
||||
- [JarvisLabs.ai](https://jarvislabs.ai)
|
||||
|
||||
---
|
||||
|
||||
51
_quarto.yml
Normal file
51
_quarto.yml
Normal file
@@ -0,0 +1,51 @@
|
||||
project:
|
||||
type: website
|
||||
|
||||
website:
|
||||
title: "Axolotl"
|
||||
description: "Fine-tuning"
|
||||
favicon: favicon.jpg
|
||||
navbar:
|
||||
title: Axolotl
|
||||
background: dark
|
||||
pinned: false
|
||||
collapse: false
|
||||
tools:
|
||||
- icon: twitter
|
||||
href: https://twitter.com/axolotl_ai
|
||||
- icon: github
|
||||
href: https://github.com/OpenAccess-AI-Collective/axolotl/
|
||||
- icon: discord
|
||||
href: https://discord.gg/7m9sfhzaf3
|
||||
|
||||
sidebar:
|
||||
pinned: true
|
||||
collapse-level: 2
|
||||
style: docked
|
||||
contents:
|
||||
- text: Home
|
||||
href: index.qmd
|
||||
- section: "How-To Guides"
|
||||
contents:
|
||||
# TODO Edit folder structure after we have more docs.
|
||||
- docs/debugging.qmd
|
||||
- docs/multipack.qmd
|
||||
- docs/fdsp_qlora.qmd
|
||||
- docs/input_output.qmd
|
||||
- docs/rlhf.qmd
|
||||
- docs/nccl.qmd
|
||||
- docs/mac.qmd
|
||||
- docs/multi-node.qmd
|
||||
- section: "Reference"
|
||||
contents:
|
||||
- docs/config.qmd
|
||||
- docs/faq.qmd
|
||||
|
||||
|
||||
|
||||
|
||||
format:
|
||||
html:
|
||||
theme: materia
|
||||
css: styles.css
|
||||
toc: true
|
||||
39
cicd/Dockerfile.jinja
Normal file
39
cicd/Dockerfile.jinja
Normal file
@@ -0,0 +1,39 @@
|
||||
FROM winglian/axolotl-base:{{ BASE_TAG }}
|
||||
|
||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
||||
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
||||
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
||||
ENV CUDA="{{ CUDA }}"
|
||||
ENV BNB_CUDA_VERSION="{{ CUDA }}"
|
||||
ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
|
||||
ENV GITHUB_REF="{{ GITHUB_REF }}"
|
||||
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
|
||||
|
||||
WORKDIR /workspace/axolotl
|
||||
|
||||
RUN git fetch origin +$GITHUB_REF && \
|
||||
git checkout FETCH_HEAD
|
||||
|
||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm,galore,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||
else \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm,galore] $AXOLOTL_ARGS; \
|
||||
fi
|
||||
|
||||
# So we can test the Docker image
|
||||
RUN pip install pytest
|
||||
|
||||
# fix so that git fetch/pull from remote works
|
||||
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
|
||||
git config --get remote.origin.fetch
|
||||
|
||||
# helper for huggingface-login cli
|
||||
RUN git config --global credential.helper store
|
||||
5
cicd/cicd.sh
Executable file
5
cicd/cicd.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
pytest --ignore=tests/e2e/ /workspace/axolotl/tests/
|
||||
pytest /workspace/axolotl/tests/e2e/patched/
|
||||
pytest --ignore=tests/e2e/patched/ /workspace/axolotl/tests/e2e/
|
||||
75
cicd/tests.py
Normal file
75
cicd/tests.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""
|
||||
modal application to run axolotl gpu tests in Modal
|
||||
"""
|
||||
import os
|
||||
import pathlib
|
||||
import tempfile
|
||||
|
||||
import jinja2
|
||||
import modal
|
||||
from jinja2 import select_autoescape
|
||||
from modal import Image, Stub
|
||||
|
||||
cicd_path = pathlib.Path(__file__).parent.resolve()
|
||||
|
||||
template_loader = jinja2.FileSystemLoader(searchpath=cicd_path)
|
||||
template_env = jinja2.Environment(
|
||||
loader=template_loader, autoescape=select_autoescape()
|
||||
)
|
||||
df_template = template_env.get_template("Dockerfile.jinja")
|
||||
|
||||
df_args = {
|
||||
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
||||
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.0.1"),
|
||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.10-cu118-2.0.1"),
|
||||
"CUDA": os.environ.get("CUDA", "118"),
|
||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||
}
|
||||
|
||||
dockerfile_contents = df_template.render(**df_args)
|
||||
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
with open(pathlib.Path(temp_dir) / "Dockerfile", "w", encoding="utf-8") as f:
|
||||
f.write(dockerfile_contents)
|
||||
|
||||
cicd_image = (
|
||||
Image.from_dockerfile(
|
||||
pathlib.Path(temp_dir) / "Dockerfile",
|
||||
force_build=True,
|
||||
gpu="A10G",
|
||||
)
|
||||
.env(df_args)
|
||||
.pip_install("fastapi==0.110.0", "pydantic==2.6.3")
|
||||
)
|
||||
|
||||
stub = Stub("Axolotl CI/CD", secrets=[])
|
||||
|
||||
|
||||
N_GPUS = int(os.environ.get("N_GPUS", 1))
|
||||
GPU_CONFIG = modal.gpu.A10G(count=N_GPUS)
|
||||
|
||||
|
||||
def run_cmd(cmd: str, run_folder: str):
|
||||
import subprocess # nosec
|
||||
|
||||
# Propagate errors from subprocess.
|
||||
if exit_code := subprocess.call(cmd.split(), cwd=run_folder): # nosec
|
||||
exit(exit_code) # pylint: disable=consider-using-sys-exit
|
||||
|
||||
|
||||
@stub.function(
|
||||
image=cicd_image,
|
||||
gpu=GPU_CONFIG,
|
||||
timeout=45 * 60,
|
||||
cpu=8.0,
|
||||
memory=131072,
|
||||
)
|
||||
def cicd_pytest():
|
||||
run_cmd("./cicd/cicd.sh", "/workspace/axolotl")
|
||||
|
||||
|
||||
@stub.local_entrypoint()
|
||||
def main():
|
||||
cicd_pytest.remote()
|
||||
@@ -15,16 +15,8 @@
|
||||
"hysteresis": 2,
|
||||
"min_loss_scale": 1
|
||||
},
|
||||
"optimizer": {
|
||||
"type": "AdamW",
|
||||
"params": {
|
||||
"lr": "auto",
|
||||
"betas": "auto",
|
||||
"eps": "auto",
|
||||
"weight_decay": "auto"
|
||||
}
|
||||
},
|
||||
"gradient_accumulation_steps": "auto",
|
||||
"gradient_clipping": "auto",
|
||||
"train_batch_size": "auto",
|
||||
"train_micro_batch_size_per_gpu": "auto",
|
||||
"wall_clock_breakdown": false
|
||||
@@ -19,16 +19,8 @@
|
||||
"hysteresis": 2,
|
||||
"min_loss_scale": 1
|
||||
},
|
||||
"optimizer": {
|
||||
"type": "AdamW",
|
||||
"params": {
|
||||
"lr": "auto",
|
||||
"betas": "auto",
|
||||
"eps": "auto",
|
||||
"weight_decay": "auto"
|
||||
}
|
||||
},
|
||||
"gradient_accumulation_steps": "auto",
|
||||
"gradient_clipping": "auto",
|
||||
"train_batch_size": "auto",
|
||||
"train_micro_batch_size_per_gpu": "auto",
|
||||
"wall_clock_breakdown": false
|
||||
@@ -23,16 +23,8 @@
|
||||
"hysteresis": 2,
|
||||
"min_loss_scale": 1
|
||||
},
|
||||
"optimizer": {
|
||||
"type": "AdamW",
|
||||
"params": {
|
||||
"lr": "auto",
|
||||
"betas": "auto",
|
||||
"eps": "auto",
|
||||
"weight_decay": "auto"
|
||||
}
|
||||
},
|
||||
"gradient_accumulation_steps": "auto",
|
||||
"gradient_clipping": "auto",
|
||||
"train_batch_size": "auto",
|
||||
"train_micro_batch_size_per_gpu": "auto",
|
||||
"wall_clock_breakdown": false
|
||||
@@ -23,16 +23,8 @@
|
||||
"hysteresis": 2,
|
||||
"min_loss_scale": 1
|
||||
},
|
||||
"optimizer": {
|
||||
"type": "AdamW",
|
||||
"params": {
|
||||
"lr": "auto",
|
||||
"betas": "auto",
|
||||
"eps": "auto",
|
||||
"weight_decay": "auto"
|
||||
}
|
||||
},
|
||||
"gradient_accumulation_steps": "auto",
|
||||
"gradient_clipping": "auto",
|
||||
"train_batch_size": "auto",
|
||||
"train_micro_batch_size_per_gpu": "auto",
|
||||
"wall_clock_breakdown": false
|
||||
@@ -1 +1 @@
|
||||
This directory contains example config files that might be useful for debugging. Please see [docs/debugging.md](../docs/debugging.md) for more information.
|
||||
This directory contains example config files that might be useful for debugging. Please see [docs/debugging.qmd](../docs/debugging.qmd) for more information.
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -3,9 +3,10 @@ FROM winglian/axolotl-base:$BASE_TAG
|
||||
|
||||
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
||||
ARG AXOLOTL_EXTRAS=""
|
||||
ARG AXOLOTL_ARGS=""
|
||||
ARG CUDA="118"
|
||||
ENV BNB_CUDA_VERSION=$CUDA
|
||||
ARG PYTORCH_VERSION="2.0.1"
|
||||
ARG PYTORCH_VERSION="2.1.2"
|
||||
|
||||
ENV PYTORCH_VERSION=$PYTORCH_VERSION
|
||||
|
||||
@@ -20,9 +21,9 @@ WORKDIR /workspace/axolotl
|
||||
|
||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm,galore,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||
else \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm]; \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm,galore] $AXOLOTL_ARGS; \
|
||||
fi
|
||||
|
||||
# So we can test the Docker image
|
||||
|
||||
@@ -7,8 +7,8 @@ FROM nvidia/cuda:$CUDA_VERSION-cudnn$CUDNN_VERSION-devel-ubuntu$UBUNTU_VERSION a
|
||||
|
||||
ENV PATH="/root/miniconda3/bin:${PATH}"
|
||||
|
||||
ARG PYTHON_VERSION="3.9"
|
||||
ARG PYTORCH_VERSION="2.0.1"
|
||||
ARG PYTHON_VERSION="3.10"
|
||||
ARG PYTORCH_VERSION="2.1.2"
|
||||
ARG CUDA="118"
|
||||
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
||||
|
||||
@@ -29,7 +29,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
|
||||
WORKDIR /workspace
|
||||
|
||||
RUN python3 -m pip install --upgrade pip && pip3 install packaging && \
|
||||
python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} deepspeed-kernels --extra-index-url https://download.pytorch.org/whl/cu$CUDA
|
||||
python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA
|
||||
|
||||
RUN git lfs install --skip-repo && \
|
||||
pip3 install awscli && \
|
||||
|
||||
@@ -7,14 +7,21 @@ ENV TRANSFORMERS_CACHE="/workspace/data/huggingface-cache/hub"
|
||||
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
||||
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
||||
|
||||
COPY scripts/runpod-entrypoint.sh /root/runpod-entrypoint.sh
|
||||
EXPOSE 8888
|
||||
EXPOSE 22
|
||||
|
||||
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
||||
COPY scripts/motd /etc/motd
|
||||
|
||||
RUN pip install jupyterlab notebook ipywidgets && \
|
||||
jupyter lab clean
|
||||
RUN apt install --yes --no-install-recommends openssh-server tmux && \
|
||||
mkdir -p ~/.ssh && \
|
||||
chmod 700 ~/.ssh && \
|
||||
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
|
||||
chmod +x /workspace/axolotl/scripts/runpod-entrypoint.sh && \
|
||||
chmod +x /root/runpod-entrypoint.sh
|
||||
printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
|
||||
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
|
||||
chmod +x /root/cloud-entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/root/runpod-entrypoint.sh"]
|
||||
ENTRYPOINT ["/root/cloud-entrypoint.sh"]
|
||||
CMD ["sleep", "infinity"]
|
||||
@@ -3,9 +3,10 @@ FROM winglian/axolotl-base:$BASE_TAG
|
||||
|
||||
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
||||
ARG AXOLOTL_EXTRAS=""
|
||||
ARG AXOLOTL_ARGS=""
|
||||
ARG CUDA="118"
|
||||
ENV BNB_CUDA_VERSION=$CUDA
|
||||
ARG PYTORCH_VERSION="2.0.1"
|
||||
ARG PYTORCH_VERSION="2.1.2"
|
||||
ARG GITHUB_REF="main"
|
||||
|
||||
ENV PYTORCH_VERSION=$PYTORCH_VERSION
|
||||
@@ -24,9 +25,9 @@ RUN git fetch origin +$GITHUB_REF && \
|
||||
|
||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||
else \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm]; \
|
||||
pip install -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
|
||||
fi
|
||||
|
||||
# So we can test the Docker image
|
||||
|
||||
2
docs/.gitignore
vendored
Normal file
2
docs/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
/.quarto/
|
||||
_site/
|
||||
17
docs/config.qmd
Normal file
17
docs/config.qmd
Normal file
@@ -0,0 +1,17 @@
|
||||
---
|
||||
title: Config options
|
||||
description: A complete list of all configuration options.
|
||||
---
|
||||
|
||||
```{python}
|
||||
#|echo: false
|
||||
#|output: asis
|
||||
import re
|
||||
# Regex pattern to match the YAML block including its code fence
|
||||
pattern = r'<details[^>]*id="all-yaml-options"[^>]*>.*?<summary>All yaml options.*?```yaml(.*?)```.*?</details>'
|
||||
|
||||
with open('../README.md', 'r') as f:
|
||||
doc = f.read()
|
||||
match = re.search(pattern, doc, re.DOTALL)
|
||||
print("```yaml", match.group(1).strip(), "```", sep="\n")
|
||||
```
|
||||
@@ -1,4 +1,8 @@
|
||||
# Debugging Axolotl
|
||||
---
|
||||
title: Debugging
|
||||
description: How to debug Axolotl
|
||||
---
|
||||
|
||||
|
||||
This document provides some tips and tricks for debugging Axolotl. It also provides an example configuration for debugging with VSCode. A good debugging setup is essential to understanding how Axolotl code works behind the scenes.
|
||||
|
||||
@@ -74,7 +78,6 @@ pip3 install -e '.[flash-attn,deepspeed]'
|
||||
|
||||
If you developing on a remote host, you can easily use VSCode to debug remotely. To do so, you will need to follow this [remote - SSH guide](https://code.visualstudio.com/docs/remote/ssh). You can also see the video below on [Docker and Remote SSH debugging](#video---attaching-to-docker-on-remote-host).
|
||||
|
||||
```bash
|
||||
|
||||
### Configuration
|
||||
|
||||
18
docs/faq.md
18
docs/faq.md
@@ -1,18 +0,0 @@
|
||||
# Axolotl FAQ's
|
||||
|
||||
|
||||
> The trainer stopped and hasn't progressed in several minutes.
|
||||
|
||||
Usually an issue with the GPU's communicating with each other. See the [NCCL doc](../docs/nccl.md)
|
||||
|
||||
> Exitcode -9
|
||||
|
||||
This usually happens when you run out of system RAM.
|
||||
|
||||
> Exitcode -7 while using deepspeed
|
||||
|
||||
Try upgrading deepspeed w: `pip install -U deepspeed`
|
||||
|
||||
> AttributeError: 'DummyOptim' object has no attribute 'step'
|
||||
|
||||
You may be using deepspeed with single gpu. Please don't set `deepspeed:` in yaml or cli.
|
||||
21
docs/faq.qmd
Normal file
21
docs/faq.qmd
Normal file
@@ -0,0 +1,21 @@
|
||||
---
|
||||
title: FAQ
|
||||
description: Frequently asked questions
|
||||
---
|
||||
|
||||
|
||||
**Q: The trainer stopped and hasn't progressed in several minutes.**
|
||||
|
||||
> A: Usually an issue with the GPUs communicating with each other. See the [NCCL doc](nccl.qmd)
|
||||
|
||||
**Q: Exitcode -9**
|
||||
|
||||
> A: This usually happens when you run out of system RAM.
|
||||
|
||||
**Q: Exitcode -7 while using deepspeed**
|
||||
|
||||
> A: Try upgrading deepspeed w: `pip install -U deepspeed`
|
||||
|
||||
**Q: AttributeError: 'DummyOptim' object has no attribute 'step'**
|
||||
|
||||
> A: You may be using deepspeed with single gpu. Please don't set `deepspeed:` in yaml or cli.
|
||||
43
docs/fsdp_qlora.qmd
Normal file
43
docs/fsdp_qlora.qmd
Normal file
@@ -0,0 +1,43 @@
|
||||
---
|
||||
title: FDSP + QLoRA
|
||||
description: Use FSDP with QLoRA to fine-tune large LLMs on consumer GPUs.
|
||||
format:
|
||||
html:
|
||||
toc: true
|
||||
---
|
||||
|
||||
## Background
|
||||
|
||||
Using FSDP with QLoRA is essential for **fine-tuning larger (70b+ parameter) LLMs on consumer GPUs.** For example, you can use FSDP + QLoRA to train a 70b model on two 24GB GPUs[^1].
|
||||
|
||||
Below, we describe how to use this feature in Axolotl.
|
||||
|
||||
## Usage
|
||||
|
||||
To enable `QLoRA` with `FSDP`, you need to perform the following steps:
|
||||
|
||||
> ![Tip]
|
||||
> See the [example config](#example-config) file in addition to reading these instructions.
|
||||
|
||||
1. Set `adapter: qlora` in your axolotl config file.
|
||||
2. Enable FSDP in your axolotl config, as [described here](https://github.com/OpenAccess-AI-Collective/axolotl?tab=readme-ov-file#fsdp).
|
||||
3. Use one of the supported model types: `llama`, `mistral` or `mixtral`.
|
||||
|
||||
## Example Config
|
||||
|
||||
[examples/llama-2/qlora-fsdp.yml](../examples/llama-2/qlora-fsdp.yml) contains an example of how to enable QLoRA + FSDP in axolotl.
|
||||
|
||||
## References
|
||||
|
||||
- [PR #1378](https://github.com/OpenAccess-AI-Collective/axolotl/pull/1378) enabling QLoRA in FSDP in Axolotl.
|
||||
- [Blog Post](https://www.answer.ai/posts/2024-03-06-fsdp-qlora.html) from the [Answer.AI](https://www.answer.ai/) team describing the work that enabled QLoRA in FSDP.
|
||||
- Related HuggingFace PRs Enabling FDSP + QLoRA:
|
||||
- Accelerate [PR#2544](https://github.com/huggingface/accelerate/pull/2544 )
|
||||
- Transformers [PR#29587](https://github.com/huggingface/transformers/pull/29587)
|
||||
- TRL [PR#1416](https://github.com/huggingface/trl/pull/1416)
|
||||
- PEFT [PR#1550](https://github.com/huggingface/peft/pull/1550)
|
||||
|
||||
|
||||
|
||||
|
||||
[^1]: This was enabled by [this work](https://www.answer.ai/posts/2024-03-06-fsdp-qlora.html) from the Answer.AI team.
|
||||
BIN
docs/images/4d-mask.png
Normal file
BIN
docs/images/4d-mask.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 239 KiB |
263
docs/input_output.qmd
Normal file
263
docs/input_output.qmd
Normal file
@@ -0,0 +1,263 @@
|
||||
---
|
||||
title: Template-free prompt construction
|
||||
description: "Template-free prompt construction with the `input_output` format"
|
||||
---
|
||||
|
||||
<!-- TOC -->
|
||||
|
||||
- [Background](#background)
|
||||
- [Masking Inputs](#masking-inputs)
|
||||
- [You may not want prompt templates](#you-may-not-want-prompt-templates)
|
||||
- [The `input_output` format](#the-input_output-format)
|
||||
- [Usage](#usage)
|
||||
- [1. Prepare Data](#1-prepare-data)
|
||||
- [2. Use `type: input_output`](#2-use-type-input_output)
|
||||
- [3. Check the prompts](#3-check-the-prompts)
|
||||
|
||||
<!-- /TOC -->
|
||||
|
||||
<a id="markdown-background" name="background"></a>
|
||||
|
||||
## Background
|
||||
|
||||
<a id="markdown-masking-inputs" name="masking-inputs"></a>
|
||||
|
||||
### Masking Inputs
|
||||
|
||||
One of the most popular features of
|
||||
[axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) is
|
||||
setting the following configuration value:
|
||||
|
||||
|
||||
```yaml
|
||||
train_on_inputs: false
|
||||
```
|
||||
|
||||
If you declare a [dataset formats](https://github.com/OpenAccess-AI-Collective/axolotl?tab=readme-ov-file#dataset)
|
||||
such as `alpaca` or `chatml`, axolotl knows what is an input
|
||||
(i.e. human) vs. an output (i.e. the assistant) and masks the input
|
||||
labels so that your model can focus on predicting the outputs only.
|
||||
|
||||
<a id="markdown-you-may-not-want-prompt-templates" name="you-may-not-want-prompt-templates"></a>
|
||||
|
||||
### You may not want prompt templates
|
||||
|
||||
However, there are many situations where you don't want to use one of
|
||||
these formats or templates (I usually don't!). This is because they can:
|
||||
|
||||
- Add unnecessary boilerplate to your prompts.
|
||||
- Create artifacts like special delimiters `<|im_start|>` that can
|
||||
quickly become footguns if you don't include them correctly at
|
||||
inference time.
|
||||
- Enforce a *chat* interface when you do not want one. Sometimes you
|
||||
just want to fine-tune a model to a very specific task and do NOT
|
||||
want multi-turn conversations, roles, etc.
|
||||
- Limit you to only certain roles that the template allows.
|
||||
|
||||
<a id="markdown-the-inputoutput-format" name="the-inputoutput-format"></a>
|
||||
|
||||
### The `input_output` format
|
||||
|
||||
You can construct your prompts without a template by using the
|
||||
`input_output` format, by setting `type: input_output` in your
|
||||
configuration file like this:
|
||||
|
||||
**config.yml**
|
||||
|
||||
```yaml
|
||||
train_on_inputs: false # Mask segments of your data
|
||||
datasets:
|
||||
- path: output.jsonl
|
||||
type: input_output # use template free prompt construction
|
||||
```
|
||||
|
||||
Unlike `type: completion`, which is also template-free,
|
||||
`type: input_output` allows you to mask segments of your text. More
|
||||
details on how this works are described below.
|
||||
|
||||
<a id="markdown-usage" name="usage"></a>
|
||||
|
||||
## Usage
|
||||
|
||||
This is how you can use the `input_output` format:
|
||||
|
||||
<a id="markdown-1-prepare-data" name="1-prepare-data"></a>
|
||||
|
||||
### 1. Prepare Data
|
||||
|
||||
To use the `input_output` format, collect your data in the following
|
||||
format into a jsonl file (below is the first row from the file
|
||||
`output`.jsonl` pretty printed):
|
||||
|
||||
```bash
|
||||
$ head -n1 output.jsonl | python -m json.tool
|
||||
|
||||
{.cell-output .cell-output-stdout}
|
||||
{
|
||||
"segments": [
|
||||
{
|
||||
"label": true,
|
||||
"text": "<s>Hello\n"
|
||||
},
|
||||
{
|
||||
"label": true,
|
||||
"text": "hi there!. "
|
||||
},
|
||||
{
|
||||
"label": false,
|
||||
"text": "goodbye "
|
||||
},
|
||||
{
|
||||
"label": true,
|
||||
"text": "farewell</s>"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Set `label:false` when you want to mask a segment of text so that the
|
||||
model isn't trained on it. Some things to keep in mind:
|
||||
|
||||
> [!IMPORTANT]
|
||||
> 1. **EOS, BOS, spaces, newlines etc. are entirely up to you. Axolotl
|
||||
concatenates all the segments as-is.** The tokenizer doesn't add
|
||||
anything additional. Notice how I added spaces, newlines, `<s>`
|
||||
(BOS), and `</s>` (EOS) myself.
|
||||
> 2. Make sure you check the materialized output to validate that the
|
||||
prompt is getting assembled how you like.
|
||||
|
||||
<a id="markdown-2-use-type-inputoutput" name="2-use-type-inputoutput"></a>
|
||||
|
||||
### 2. Use `type: input_output`
|
||||
|
||||
Let's materialize data with our `output.jsonl` file by setting
|
||||
`type: input_output` in our axolotl config:
|
||||
|
||||
```yaml
|
||||
# training_config.yaml
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
data_seed: 49
|
||||
seed: 49
|
||||
|
||||
datasets:
|
||||
- path: output.jsonl
|
||||
type: input_output
|
||||
val_set_size: 0.1
|
||||
|
||||
sequence_len: 896
|
||||
sample_packing: false
|
||||
|
||||
micro_batch_size: 2
|
||||
gradient_accumulation_steps: 3
|
||||
eval_batch_size: 2
|
||||
num_epochs: 1
|
||||
learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
special_tokens:
|
||||
bos_token: "<s>"
|
||||
eos_token: "</s>"
|
||||
unk_token: "<unk>"
|
||||
```
|
||||
|
||||
You can use the following command to materialize your data. The
|
||||
`--debug` flag will print the tokens, along with the labels so you can
|
||||
verify that the correct items are being ignored:
|
||||
|
||||
```bash
|
||||
$ python -m axolotl.cli.preprocess training_config.yaml --debug
|
||||
|
||||
...
|
||||
[2024-03-05 23:36:46,969] [INFO] [axolotl.check_example_labels:35] [PID:607731] [RANK:0] <s>(1, 1) Hello(22557, 22557)
|
||||
(13, 13) hi(12014, 12014) there(736, 736) !(28808, 28808) .(28723, 28723) (28705, 28705) good(-100, 1179) bye(-100, 17664) (-100, 28705) fare(19111, 19111) well(5458, 5458) </s>(2, 2)
|
||||
|
||||
```
|
||||
|
||||
The format is `decoded_token`(`label`, `token_id`), for example,
|
||||
`<s>(1, 1)` means that the token is `<s>`, the label is `1` and the
|
||||
token_id is `1`. When the label is `-100` then that token is ignored for
|
||||
training.
|
||||
|
||||
<a id="markdown-3-check-the-prompts" name="3-check-the-prompts"></a>
|
||||
|
||||
### 3. Check the prompts
|
||||
|
||||
Here is another way to check the materialized output:
|
||||
|
||||
```python
|
||||
from transformers import AutoTokenizer
|
||||
from datasets import load_from_disk
|
||||
import yaml
|
||||
|
||||
directory = !ls last_run_prepared/
|
||||
with open('training_config.yaml', 'r') as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
model_id = cfg['base_model']
|
||||
tok = AutoTokenizer.from_pretrained(model_id)
|
||||
ds = load_from_disk(f'last_run_prepared/{directory[0]}/')
|
||||
```
|
||||
|
||||
```python
|
||||
>>> row = ds[0]
|
||||
>>> print(tok.decode(row['input_ids']))
|
||||
<s> Hello
|
||||
hi there!. goodbye farewell</s>
|
||||
```
|
||||
|
||||
We can check that the right tokens are ingored by comparing the labels
|
||||
to each token:
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
pd.DataFrame([{'token': tok.decode(i), 'label': l, 'id':i} for i,l in
|
||||
zip(row['input_ids'], row['labels'])])
|
||||
```
|
||||
|
||||
| token | label | id |
|
||||
|-------|-------|-------|
|
||||
| 0 | \<s\> | 1 |
|
||||
| 1 | Hello | 22557 |
|
||||
| 2 | \\n | 13 |
|
||||
| 3 | hi | 12014 |
|
||||
| 4 | there | 736 |
|
||||
| 5 | ! | 28808 |
|
||||
| 6 | . | 28723 |
|
||||
| 7 | | 28705 |
|
||||
| 8 | good | -100 |
|
||||
| 9 | bye | -100 |
|
||||
| 10 | | -100 |
|
||||
| 11 | fare | 19111 |
|
||||
| 12 | well | 5458 |
|
||||
| 13 | \</s\>| 2 |
|
||||
|
||||
|
||||
|
||||
If we look at the input data, the above table seems correct! (The jsonl
|
||||
version is repeated below for reference):
|
||||
|
||||
|
||||
```bash
|
||||
$ head -n1 output.jsonl | python -m json.tool
|
||||
|
||||
{.cell-output .cell-output-stdout}
|
||||
{
|
||||
"segments": [
|
||||
{
|
||||
"label": true,
|
||||
"text": "<s>Hello\n"
|
||||
},
|
||||
{
|
||||
"label": true,
|
||||
"text": "hi there!. "
|
||||
},
|
||||
{
|
||||
"label": false,
|
||||
"text": "goodbye "
|
||||
},
|
||||
{
|
||||
"label": true,
|
||||
"text": "farewell</s>"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
22
docs/mac.qmd
Normal file
22
docs/mac.qmd
Normal file
@@ -0,0 +1,22 @@
|
||||
---
|
||||
title: Mac M-series
|
||||
description: Mac M-series support
|
||||
---
|
||||
|
||||
Currently Axolotl on Mac is partially usable, many of the dependencies of Axolotl including Pytorch do not support MPS or have incomplete support.
|
||||
|
||||
Current support:
|
||||
|
||||
- [x] Support for all models
|
||||
- [x] Full training of models
|
||||
- [x] LoRA training
|
||||
- [x] Sample packing
|
||||
- [ ] FP16 and BF16 (awaiting AMP support for MPS in Pytorch)
|
||||
- [ ] Tri-dao's flash-attn (until it is supported use spd_attention as an alternative)
|
||||
- [ ] xformers
|
||||
- [ ] bitsandbytes (meaning no 4/8 bits loading and bnb optimizers)
|
||||
- [ ] qlora
|
||||
- [ ] DeepSpeed
|
||||
|
||||
Untested:
|
||||
- FSDP
|
||||
@@ -1,4 +1,7 @@
|
||||
# Multi Node
|
||||
---
|
||||
title: Multi Node
|
||||
description: How to use Axolotl on multiple machines
|
||||
---
|
||||
|
||||
You will need to create a configuration for accelerate, either by using `accelerate config` and follow the instructions or you can use one of the preset below:
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
# Multipack
|
||||
|
||||
4k context, bsz =4,
|
||||
each character represents 256 tokens
|
||||
X represents a padding token
|
||||
|
||||
```
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
|
||||
[[ A A A A A A A A A A A ]
|
||||
B B B B B B ]
|
||||
C C C C C C C ]
|
||||
D D D D ]]
|
||||
|
||||
[[ E E E E E E E E ]
|
||||
[ F F F F ]
|
||||
[ G G G ]
|
||||
[ H H H H ]]
|
||||
|
||||
[[ I I I ]
|
||||
[ J J J ]
|
||||
[ K K K K K]
|
||||
[ L L L ]]
|
||||
```
|
||||
|
||||
after padding to longest input in each step
|
||||
```
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
|
||||
[[ A A A A A A A A A A A ]
|
||||
B B B B B B X X X X X X ]
|
||||
C C C C C C C X X X X ]
|
||||
D D D D X X X X X X X ]]
|
||||
|
||||
[[ E E E E E E E E ]
|
||||
[ F F F F X X X X ]
|
||||
[ G G G X X X X X ]
|
||||
[ H H H H X X X X ]]
|
||||
|
||||
[[ I I I X X ]
|
||||
[ J J J X X ]
|
||||
[ K K K K K ]
|
||||
[ L L L X X ]]
|
||||
```
|
||||
|
||||
w packing ( note it's the same effective number of tokens per step, but a true bsz of 1)
|
||||
```
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
|
||||
[[ A A A A A A A A A A A B B B B B
|
||||
B C C C C C C C D D D D E E E E
|
||||
E E E E F F F F F G G G H H H H
|
||||
I I I J J J J K K K K K L L L X ]]
|
||||
```
|
||||
76
docs/multipack.qmd
Normal file
76
docs/multipack.qmd
Normal file
@@ -0,0 +1,76 @@
|
||||
---
|
||||
title: Multipack (Sample Packing)
|
||||
description: Multipack is a technique to pack multiple sequences into a single batch to increase training throughput.
|
||||
---
|
||||
|
||||
## Visualization of Multipack with Flash Attention
|
||||
|
||||
Because Flash Attention simply drops the attention mask, we do not need to
|
||||
construct a 4d attention mask. We only need to concatenate the sequences into
|
||||
a single batch and let flash attention know where each new sequence begins.
|
||||
|
||||
|
||||
4k context, bsz =4,
|
||||
each character represents 256 tokens
|
||||
X represents a padding token
|
||||
|
||||
```
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
|
||||
[[ A A A A A A A A A A A ]
|
||||
B B B B B B ]
|
||||
C C C C C C C ]
|
||||
D D D D ]]
|
||||
|
||||
[[ E E E E E E E E ]
|
||||
[ F F F F ]
|
||||
[ G G G ]
|
||||
[ H H H H ]]
|
||||
|
||||
[[ I I I ]
|
||||
[ J J J ]
|
||||
[ K K K K K]
|
||||
[ L L L ]]
|
||||
```
|
||||
|
||||
after padding to longest input in each step
|
||||
```
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
|
||||
[[ A A A A A A A A A A A ]
|
||||
B B B B B B X X X X X X ]
|
||||
C C C C C C C X X X X ]
|
||||
D D D D X X X X X X X ]]
|
||||
|
||||
[[ E E E E E E E E ]
|
||||
[ F F F F X X X X ]
|
||||
[ G G G X X X X X ]
|
||||
[ H H H H X X X X ]]
|
||||
|
||||
[[ I I I X X ]
|
||||
[ J J J X X ]
|
||||
[ K K K K K ]
|
||||
[ L L L X X ]]
|
||||
```
|
||||
|
||||
w packing ( note it's the same effective number of tokens per step, but a true bsz of 1)
|
||||
```
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
|
||||
[[ A A A A A A A A A A A B B B B B
|
||||
B C C C C C C C D D D D E E E E
|
||||
E E E E F F F F F G G G H H H H
|
||||
I I I J J J J K K K K K L L L X ]]
|
||||
```
|
||||
|
||||
cu_seqlens:
|
||||
[[ 0, 11, 17, 24, 28, 36, 41 44, 48, 51, 55, 60, 64]]
|
||||
|
||||
|
||||
## Multipack without Flash Attention
|
||||
|
||||
Multipack can still be achieved without Flash attention, but with lower packing
|
||||
efficiency as we are not able to join multiple batches into a single batch due to
|
||||
context length limits without flash attention. We can use either Pytorch's Scaled
|
||||
Dot Product Attention implementation or native Pytorch attention implementation
|
||||
along with [4d attention masks](https://github.com/huggingface/transformers/pull/27539)
|
||||
to pack sequences together and avoid cross attention.
|
||||
|
||||
<img src="./images/4d-mask.png" alt="axolotl" width="800">
|
||||
@@ -1,4 +1,7 @@
|
||||
# NCCL
|
||||
---
|
||||
title: NCCL
|
||||
description: Troubleshooting NCCL issues
|
||||
---
|
||||
|
||||
NVIDIA NCCL is a library to facilitate and optimize multi-GPU communication operations, such as broadcast, all-gather, reduce, all-reduce, etc. Broadly, NCCL configuration is highly environment-specific and is configured via several [environment variables](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html). A common NCCL-related problem occurs when a long-running operation times out causing the training process to abort:
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
# RLHF (Beta)
|
||||
---
|
||||
title: "RLHF (Beta)"
|
||||
description: "Reinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback."
|
||||
---
|
||||
|
||||
### Overview
|
||||
|
||||
@@ -12,21 +15,21 @@ feedback. Various methods include, but not limited to:
|
||||
|
||||
### RLHF using Axolotl
|
||||
|
||||
[!IMPORTANT]
|
||||
This is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.
|
||||
>[!IMPORTANT]
|
||||
>This is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.
|
||||
|
||||
The various RL training methods are implemented in trl and wrapped via axolotl. Below are various examples with how you can use various preference datasets to train models that use ChatML
|
||||
|
||||
#### DPO
|
||||
```yaml
|
||||
rl: true
|
||||
rl: dpo
|
||||
datasets:
|
||||
- path: Intel/orca_dpo_pairs
|
||||
split: train
|
||||
type: intel_apply_chatml
|
||||
type: chatml.intel
|
||||
- path: argilla/ultrafeedback-binarized-preferences
|
||||
split: train
|
||||
type: argilla_apply_chatml
|
||||
type: chatml.argilla
|
||||
```
|
||||
|
||||
#### IPO
|
||||
@@ -34,6 +37,31 @@ datasets:
|
||||
rl: ipo
|
||||
```
|
||||
|
||||
#### ORPO
|
||||
|
||||
Paper: https://arxiv.org/abs/2403.07691
|
||||
|
||||
```yaml
|
||||
rl: orpo
|
||||
orpo_alpha: 0.1
|
||||
remove_unused_columns: false
|
||||
|
||||
chat_template: chatml
|
||||
datasets:
|
||||
- path: argilla/ultrafeedback-binarized-preferences-cleaned
|
||||
type: orpo.chat_template
|
||||
```
|
||||
|
||||
#### Using local dataset files
|
||||
```yaml
|
||||
datasets:
|
||||
- ds_type: json
|
||||
data_files:
|
||||
- orca_rlhf.jsonl
|
||||
split: train
|
||||
type: chatml.intel
|
||||
```
|
||||
|
||||
#### Trl autounwrap for peft
|
||||
|
||||
Trl supports autounwrapping peft models, so that a ref model does not need to be additionally loaded, leading to less VRAM needed. This is on by default. To turn it off, pass the following config.
|
||||
@@ -53,8 +53,8 @@ lr_quadratic_warmup: true
|
||||
learning_rate: 0.000085
|
||||
train_on_inputs: true
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
|
||||
gradient_checkpointing: false
|
||||
|
||||
@@ -11,7 +11,6 @@ val_set_size: 0.05
|
||||
adapter: qlora
|
||||
lora_model_dir:
|
||||
sequence_len: 2048
|
||||
max_packed_sequence_len: 2048
|
||||
lora_r: 16
|
||||
lora_alpha: 32
|
||||
lora_dropout: 0.05
|
||||
@@ -36,8 +35,8 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: codellama/CodeLlama-13b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
@@ -41,8 +40,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
@@ -52,6 +51,7 @@ local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
s2_attention:
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: codellama/CodeLlama-13b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
@@ -43,8 +42,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: codellama/CodeLlama-34b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
@@ -41,8 +40,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
@@ -52,6 +51,7 @@ local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
s2_attention:
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: codellama/CodeLlama-34b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
@@ -43,8 +42,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: codellama/CodeLlama-7b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
@@ -41,8 +40,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
@@ -52,6 +51,7 @@ local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
s2_attention:
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: codellama/CodeLlama-7b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
@@ -43,8 +42,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
216
examples/colab-notebooks/colab-axolotl-example.ipynb
Normal file
216
examples/colab-notebooks/colab-axolotl-example.ipynb
Normal file
@@ -0,0 +1,216 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "AKjdG7tbTb-n"
|
||||
},
|
||||
"source": [
|
||||
"# Example notebook for running Axolotl on google colab"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "RcbNpOgWRcii"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"# Check so there is a gpu available, a T4(free tier) is enough to run this notebook\n",
|
||||
"assert (torch.cuda.is_available()==True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "h3nLav8oTRA5"
|
||||
},
|
||||
"source": [
|
||||
"## Install Axolotl and dependencies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "3c3yGAwnOIdi",
|
||||
"outputId": "e3777b5a-40ef-424f-e181-62dfecd1dd01"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install torch==\"2.1.2\"\n",
|
||||
"!pip install -e git+https://github.com/OpenAccess-AI-Collective/axolotl#egg=axolotl\n",
|
||||
"!pip install flash-attn==\"2.5.0\"\n",
|
||||
"!pip install deepspeed==\"0.13.1\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "BW2MFr7HTjub"
|
||||
},
|
||||
"source": [
|
||||
"## Create an yaml config file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "9pkF2dSoQEUN"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import yaml\n",
|
||||
"\n",
|
||||
"# Your YAML string\n",
|
||||
"yaml_string = \"\"\"\n",
|
||||
"base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\n",
|
||||
"model_type: LlamaForCausalLM\n",
|
||||
"tokenizer_type: LlamaTokenizer\n",
|
||||
"is_llama_derived_model: true\n",
|
||||
"\n",
|
||||
"load_in_8bit: false\n",
|
||||
"load_in_4bit: true\n",
|
||||
"strict: false\n",
|
||||
"\n",
|
||||
"datasets:\n",
|
||||
" - path: mhenrichsen/alpaca_2k_test\n",
|
||||
" type: alpaca\n",
|
||||
"dataset_prepared_path:\n",
|
||||
"val_set_size: 0.05\n",
|
||||
"output_dir: ./qlora-out\n",
|
||||
"\n",
|
||||
"adapter: qlora\n",
|
||||
"lora_model_dir:\n",
|
||||
"\n",
|
||||
"sequence_len: 1096\n",
|
||||
"sample_packing: true\n",
|
||||
"pad_to_sequence_len: true\n",
|
||||
"\n",
|
||||
"lora_r: 32\n",
|
||||
"lora_alpha: 16\n",
|
||||
"lora_dropout: 0.05\n",
|
||||
"lora_target_modules:\n",
|
||||
"lora_target_linear: true\n",
|
||||
"lora_fan_in_fan_out:\n",
|
||||
"\n",
|
||||
"wandb_project:\n",
|
||||
"wandb_entity:\n",
|
||||
"wandb_watch:\n",
|
||||
"wandb_name:\n",
|
||||
"wandb_log_model:\n",
|
||||
"\n",
|
||||
"mlflow_experiment_name: colab-example\n",
|
||||
"\n",
|
||||
"gradient_accumulation_steps: 1\n",
|
||||
"micro_batch_size: 1\n",
|
||||
"num_epochs: 4\n",
|
||||
"max_steps: 20\n",
|
||||
"optimizer: paged_adamw_32bit\n",
|
||||
"lr_scheduler: cosine\n",
|
||||
"learning_rate: 0.0002\n",
|
||||
"\n",
|
||||
"train_on_inputs: false\n",
|
||||
"group_by_length: false\n",
|
||||
"bf16: false\n",
|
||||
"fp16: true\n",
|
||||
"tf32: false\n",
|
||||
"\n",
|
||||
"gradient_checkpointing: true\n",
|
||||
"early_stopping_patience:\n",
|
||||
"resume_from_checkpoint:\n",
|
||||
"local_rank:\n",
|
||||
"logging_steps: 1\n",
|
||||
"xformers_attention:\n",
|
||||
"flash_attention: false\n",
|
||||
"\n",
|
||||
"warmup_steps: 10\n",
|
||||
"evals_per_epoch:\n",
|
||||
"saves_per_epoch:\n",
|
||||
"debug:\n",
|
||||
"deepspeed:\n",
|
||||
"weight_decay: 0.0\n",
|
||||
"fsdp:\n",
|
||||
"fsdp_config:\n",
|
||||
"special_tokens:\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Convert the YAML string to a Python dictionary\n",
|
||||
"yaml_dict = yaml.safe_load(yaml_string)\n",
|
||||
"\n",
|
||||
"# Specify your file path\n",
|
||||
"file_path = 'test_axolotl.yaml'\n",
|
||||
"\n",
|
||||
"# Write the YAML file\n",
|
||||
"with open(file_path, 'w') as file:\n",
|
||||
" yaml.dump(yaml_dict, file)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "bidoj8YLTusD"
|
||||
},
|
||||
"source": [
|
||||
"## Launch the training"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "ydTI2Jk2RStU",
|
||||
"outputId": "d6d0df17-4b53-439c-c802-22c0456d301b"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Buy using the ! the comand will be executed as a bash command\n",
|
||||
"!accelerate launch -m axolotl.cli.train /content/test_axolotl.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Play with inference"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Buy using the ! the comand will be executed as a bash command\n",
|
||||
"!accelerate launch -m axolotl.cli.inference /content/test_axolotl.yaml \\\n",
|
||||
" --qlora_model_dir=\"./qlora-out\" --gradio"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"gpuType": "T4",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -2,7 +2,7 @@ base_model: tiiuae/falcon-7b
|
||||
trust_remote_code: true
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
is_falcon_derived_model: true
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
gptq: false
|
||||
@@ -38,8 +38,8 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.00003
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
@@ -60,5 +60,5 @@ fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
pad_token: "<|endoftext|>"
|
||||
bos_token: ">>ABSTRACT<<"
|
||||
bos_token: "<|endoftext|>"
|
||||
eos_token: "<|endoftext|>"
|
||||
|
||||
@@ -5,7 +5,7 @@ base_model: tiiuae/falcon-7b
|
||||
trust_remote_code: true
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
is_falcon_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
# enable 4bit for QLoRA
|
||||
load_in_4bit: true
|
||||
@@ -64,8 +64,8 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
gradient_checkpointing: true
|
||||
# stop training after this many evaluation losses have increased in a row
|
||||
@@ -89,5 +89,5 @@ fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
pad_token: "<|endoftext|>"
|
||||
bos_token: ">>ABSTRACT<<"
|
||||
bos_token: "<|endoftext|>"
|
||||
eos_token: "<|endoftext|>"
|
||||
|
||||
@@ -2,7 +2,7 @@ base_model: tiiuae/falcon-7b
|
||||
trust_remote_code: true
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
is_falcon_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
gptq: false
|
||||
@@ -38,8 +38,8 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.00003
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
@@ -60,5 +60,5 @@ fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
pad_token: "<|endoftext|>"
|
||||
bos_token: ">>ABSTRACT<<"
|
||||
bos_token: "<|endoftext|>"
|
||||
eos_token: "<|endoftext|>"
|
||||
|
||||
65
examples/gemma/qlora.yml
Normal file
65
examples/gemma/qlora.yml
Normal file
@@ -0,0 +1,65 @@
|
||||
# use google/gemma-7b if you have access
|
||||
base_model: mhenrichsen/gemma-7b
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
# huggingface repo
|
||||
datasets:
|
||||
- path: mhenrichsen/alpaca_2k_test
|
||||
type: alpaca
|
||||
val_set_size: 0.1
|
||||
output_dir: ./out
|
||||
|
||||
adapter: qlora
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_linear: true
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: false
|
||||
pad_to_sequence_len: true
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
|
||||
gradient_accumulation_steps: 3
|
||||
micro_batch_size: 2
|
||||
num_epochs: 4
|
||||
optimizer: adamw_bnb_8bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
|
||||
warmup_ratio: 0.1
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
@@ -33,8 +33,8 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.0001
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
|
||||
@@ -31,7 +31,7 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.00003
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
bf16: auto
|
||||
tf32: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
@@ -41,8 +40,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
@@ -62,11 +61,8 @@ evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed: #deepspeed/zero2.json # multi-gpu only
|
||||
deepspeed: #deepspeed_configs/zero2.json # multi-gpu only
|
||||
weight_decay: 0.1
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
bos_token: "<s>"
|
||||
eos_token: "</s>"
|
||||
unk_token: "<unk>"
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
base_model: TheBloke/Llama-2-7B-GPTQ
|
||||
is_llama_derived_model: false
|
||||
gptq: true
|
||||
gptq_disable_exllama: true
|
||||
model_type: AutoModelForCausalLM
|
||||
|
||||
69
examples/llama-2/loftq.yml
Normal file
69
examples/llama-2/loftq.yml
Normal file
@@ -0,0 +1,69 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
strict: false
|
||||
|
||||
datasets:
|
||||
- path: mhenrichsen/alpaca_2k_test
|
||||
type: alpaca
|
||||
dataset_prepared_path:
|
||||
val_set_size: 0.05
|
||||
output_dir: ./lora-out
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
|
||||
adapter: lora
|
||||
lora_model_dir:
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_linear: true
|
||||
lora_fan_in_fan_out:
|
||||
peft:
|
||||
loftq_config:
|
||||
loftq_bits: 4
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 4
|
||||
micro_batch_size: 2
|
||||
num_epochs: 4
|
||||
optimizer: adamw_bnb_8bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
s2_attention:
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
@@ -41,8 +40,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
@@ -52,11 +51,12 @@ local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
s2_attention:
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
@@ -64,6 +64,3 @@ weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
bos_token: "<s>"
|
||||
eos_token: "</s>"
|
||||
unk_token: "<unk>"
|
||||
|
||||
76
examples/llama-2/qlora-fsdp.yml
Normal file
76
examples/llama-2/qlora-fsdp.yml
Normal file
@@ -0,0 +1,76 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
datasets:
|
||||
- path: yahma/alpaca-cleaned
|
||||
type: alpaca
|
||||
dataset_prepared_path: last_run_prepared
|
||||
val_set_size: 0.05
|
||||
output_dir: ./qlora-out
|
||||
|
||||
adapter: qlora
|
||||
lora_model_dir:
|
||||
|
||||
sequence_len: 512
|
||||
sample_packing: false
|
||||
pad_to_sequence_len: true
|
||||
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_modules:
|
||||
lora_target_linear: true
|
||||
lora_fan_in_fan_out:
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 4
|
||||
micro_batch_size: 4
|
||||
num_epochs: 4
|
||||
optimizer: adamw_torch
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.00001
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
weight_decay: 0.0
|
||||
fsdp:
|
||||
- full_shard
|
||||
fsdp_config:
|
||||
fsdp_limit_all_gathers: true
|
||||
fsdp_sync_module_states: true
|
||||
fsdp_offload_params: true
|
||||
fsdp_use_orig_params: false
|
||||
fsdp_cpu_ram_efficient_loading: true
|
||||
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
|
||||
fsdp_state_dict_type: SHARDED_STATE_DICT
|
||||
special_tokens:
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
@@ -43,8 +42,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
@@ -65,6 +64,3 @@ weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
bos_token: "<s>"
|
||||
eos_token: "</s>"
|
||||
unk_token: "<unk>"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
@@ -47,8 +47,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -34,8 +34,8 @@ learning_rate: 5e-5
|
||||
train_on_inputs: false
|
||||
group_by_length: true
|
||||
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
|
||||
gradient_checkpointing: false
|
||||
@@ -49,7 +49,7 @@ flash_attention:
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -8,5 +8,5 @@ accelerate launch -m axolotl.cli.train examples/mistral/config.yml
|
||||
|
||||
If you run into CUDA OOM, use deepspeed with config zero2.json:
|
||||
```shell
|
||||
accelerate launch -m axolotl.cli.train examples/mistral/config.yml --deepspeed deepspeed/zero2.json
|
||||
accelerate launch -m axolotl.cli.train examples/mistral/config.yml --deepspeed deepspeed_configs/zero2.json
|
||||
```
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_mistral_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
@@ -34,8 +33,8 @@ learning_rate: 0.000005
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
@@ -49,7 +48,7 @@ flash_attention: true
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
@@ -57,6 +56,3 @@ weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
bos_token: "<s>"
|
||||
eos_token: "</s>"
|
||||
unk_token: "<unk>"
|
||||
|
||||
79
examples/mistral/lora-mps.yml
Normal file
79
examples/mistral/lora-mps.yml
Normal file
@@ -0,0 +1,79 @@
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
strict: false
|
||||
|
||||
datasets:
|
||||
- path: mhenrichsen/alpaca_2k_test
|
||||
type: alpaca
|
||||
dataset_prepared_path: last_run_prepared
|
||||
val_set_size: 0
|
||||
output_dir: ./lora-out
|
||||
eval_sample_packing: false
|
||||
|
||||
adapter: lora
|
||||
lora_model_dir:
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_linear: true
|
||||
lora_fan_in_fan_out:
|
||||
lora_target_modules:
|
||||
- gate_proj
|
||||
- down_proj
|
||||
- up_proj
|
||||
- q_proj
|
||||
- v_proj
|
||||
- k_proj
|
||||
- o_proj
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 8
|
||||
micro_batch_size: 1
|
||||
num_epochs: 2
|
||||
optimizer: adamw_torch
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16: false
|
||||
tf32: true
|
||||
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: false
|
||||
sdp_attention: true
|
||||
|
||||
loss_watchdog_threshold: 5.0
|
||||
loss_watchdog_patience: 3
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
77
examples/mistral/lora.yml
Normal file
77
examples/mistral/lora.yml
Normal file
@@ -0,0 +1,77 @@
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
strict: false
|
||||
|
||||
datasets:
|
||||
- path: mhenrichsen/alpaca_2k_test
|
||||
type: alpaca
|
||||
dataset_prepared_path: last_run_prepared
|
||||
val_set_size: 0.1
|
||||
output_dir: ./lora-out
|
||||
|
||||
adapter: lora
|
||||
lora_model_dir:
|
||||
|
||||
sequence_len: 8192
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_linear: true
|
||||
lora_fan_in_fan_out:
|
||||
lora_target_modules:
|
||||
- gate_proj
|
||||
- down_proj
|
||||
- up_proj
|
||||
- q_proj
|
||||
- v_proj
|
||||
- k_proj
|
||||
- o_proj
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 4
|
||||
micro_batch_size: 2
|
||||
num_epochs: 1
|
||||
optimizer: adamw_bnb_8bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
|
||||
loss_watchdog_threshold: 5.0
|
||||
loss_watchdog_patience: 3
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
74
examples/mistral/mixtral-qlora-fsdp.yml
Normal file
74
examples/mistral/mixtral-qlora-fsdp.yml
Normal file
@@ -0,0 +1,74 @@
|
||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
datasets:
|
||||
- path: tatsu-lab/alpaca
|
||||
type: alpaca
|
||||
dataset_prepared_path: last_run_prepared
|
||||
val_set_size: 0.02
|
||||
output_dir: ./qlora-out
|
||||
|
||||
model_config:
|
||||
output_router_logits: true
|
||||
|
||||
adapter: qlora
|
||||
lora_model_dir:
|
||||
|
||||
sequence_len: 1024
|
||||
sample_packing: false
|
||||
pad_to_sequence_len: false
|
||||
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_linear: true
|
||||
lora_fan_in_fan_out:
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 4
|
||||
micro_batch_size: 2
|
||||
num_epochs: 1
|
||||
optimizer: paged_adamw_8bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
|
||||
loss_watchdog_threshold: 5.0
|
||||
loss_watchdog_patience: 3
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
weight_decay: 0.0
|
||||
fsdp:
|
||||
- full_shard
|
||||
fsdp_config:
|
||||
fsdp_transformer_layer_cls_to_wrap: MixtralSparseMoeBlock
|
||||
special_tokens:
|
||||
@@ -16,12 +16,12 @@ output_dir: ./qlora-out
|
||||
|
||||
## You can optionally freeze the entire model and unfreeze a subset of parameters
|
||||
unfrozen_parameters:
|
||||
# - lm_head.*
|
||||
# - model.embed_tokens.*
|
||||
# - model.layers.2[0-9]+.block_sparse_moe.gate.*
|
||||
# - model.layers.2[0-9]+.block_sparse_moe.experts.*
|
||||
# - model.layers.3[0-9]+.block_sparse_moe.gate.*
|
||||
# - model.layers.3[0-9]+.block_sparse_moe.experts.*
|
||||
# - ^lm_head.weight$
|
||||
# - ^model.embed_tokens.weight$[:32000]
|
||||
# - model.layers.2[0-9]+.block_sparse_moe.gate
|
||||
# - model.layers.2[0-9]+.block_sparse_moe.experts
|
||||
# - model.layers.3[0-9]+.block_sparse_moe.gate
|
||||
# - model.layers.3[0-9]+.block_sparse_moe.experts
|
||||
|
||||
model_config:
|
||||
output_router_logits: true
|
||||
@@ -63,8 +63,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
@@ -81,10 +81,10 @@ loss_watchdog_patience: 3
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed: deepspeed/zero2.json
|
||||
deepspeed: deepspeed_configs/zero2.json
|
||||
weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_mistral_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
@@ -50,8 +49,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
@@ -68,7 +67,7 @@ loss_watchdog_patience: 3
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
@@ -76,6 +75,3 @@ weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
bos_token: "<s>"
|
||||
eos_token: "</s>"
|
||||
unk_token: "<unk>"
|
||||
|
||||
@@ -33,7 +33,7 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.0000002
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
bf16: auto
|
||||
tf32: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
|
||||
@@ -52,6 +52,7 @@ logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
gptq_groupsize:
|
||||
s2_attention:
|
||||
gptq_model_v1:
|
||||
warmup_steps: 20
|
||||
evals_per_epoch: 4
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
Due to some nuances with the phi code, please use deepspeed when training phi for full finetune.
|
||||
|
||||
```shell
|
||||
accelerate launch -m axolotl.cli.train examples/phi/phi-ft.yml --deepspeed deepspeed/zero1.json
|
||||
accelerate launch -m axolotl.cli.train examples/phi/phi-ft.yml --deepspeed deepspeed_configs/zero1.json
|
||||
|
||||
# OR
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
base_model: microsoft/phi-1_5
|
||||
model_type: PhiForCausalLM
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
is_llama_derived_model: false
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
@@ -18,7 +16,7 @@ output_dir: ./phi-sft-out
|
||||
|
||||
sequence_len: 2048
|
||||
sample_packing: true
|
||||
pad_to_sequence_len:
|
||||
pad_to_sequence_len: true
|
||||
|
||||
adapter:
|
||||
lora_model_dir:
|
||||
@@ -35,7 +33,7 @@ wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 1
|
||||
micro_batch_size: 1
|
||||
micro_batch_size: 2
|
||||
num_epochs: 4
|
||||
optimizer: adamw_torch
|
||||
adam_beta2: 0.95
|
||||
@@ -45,18 +43,20 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.000003
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: true
|
||||
bf16: true
|
||||
fp16: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
|
||||
gradient_checkpointing:
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: True
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention:
|
||||
flash_attention: true
|
||||
|
||||
warmup_steps: 100
|
||||
evals_per_epoch: 4
|
||||
@@ -68,7 +68,4 @@ fsdp:
|
||||
fsdp_config:
|
||||
resize_token_embeddings_to_32x: true
|
||||
special_tokens:
|
||||
bos_token: "<|endoftext|>"
|
||||
eos_token: "<|endoftext|>"
|
||||
unk_token: "<|endoftext|>"
|
||||
pad_token: "<|endoftext|>"
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
base_model: microsoft/phi-1_5
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
is_llama_derived_model: false
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
@@ -16,9 +14,9 @@ dataset_prepared_path:
|
||||
val_set_size: 0.05
|
||||
output_dir: ./phi-sft-out
|
||||
|
||||
sequence_len: 1024
|
||||
sample_packing: false # not CURRENTLY compatible with LoRAs
|
||||
pad_to_sequence_len:
|
||||
sequence_len: 2048
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
|
||||
adapter: qlora
|
||||
lora_model_dir:
|
||||
@@ -35,7 +33,7 @@ wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 1
|
||||
micro_batch_size: 1
|
||||
micro_batch_size: 2
|
||||
num_epochs: 4
|
||||
optimizer: adamw_torch
|
||||
adam_beta2: 0.95
|
||||
@@ -45,18 +43,20 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.000003
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: true
|
||||
bf16: true
|
||||
fp16: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
|
||||
gradient_checkpointing:
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: True
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention:
|
||||
flash_attention: true
|
||||
|
||||
warmup_steps: 100
|
||||
evals_per_epoch: 4
|
||||
@@ -68,7 +68,4 @@ fsdp:
|
||||
fsdp_config:
|
||||
resize_token_embeddings_to_32x: true
|
||||
special_tokens:
|
||||
bos_token: "<|endoftext|>"
|
||||
eos_token: "<|endoftext|>"
|
||||
unk_token: "<|endoftext|>"
|
||||
pad_token: "<|endoftext|>"
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
base_model: microsoft/phi-2
|
||||
model_revision: 834565c # pin model repo to the previous architecture
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
@@ -17,19 +15,16 @@ val_set_size: 0.05
|
||||
output_dir: ./phi-sft-out
|
||||
|
||||
sequence_len: 2048
|
||||
sample_packing: false # currently unsupported
|
||||
pad_to_sequence_len:
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
|
||||
adapter:
|
||||
lora_model_dir:
|
||||
lora_r: 16
|
||||
lora_alpha: 32
|
||||
lora_dropout: 0.1
|
||||
lora_target_linear: true
|
||||
lora_r:
|
||||
lora_alpha:
|
||||
lora_dropout:
|
||||
lora_target_linear:
|
||||
lora_fan_in_fan_out:
|
||||
lora_modules_to_save:
|
||||
- embd
|
||||
- lm_head
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
@@ -38,22 +33,24 @@ wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 1
|
||||
micro_batch_size: 1
|
||||
micro_batch_size: 2
|
||||
num_epochs: 4
|
||||
optimizer: paged_adamw_8bit
|
||||
optimizer: adamw_torch
|
||||
adam_beta2: 0.95
|
||||
adam_epsilon: 0.00001
|
||||
max_grad_norm: 1.0
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 1e-5
|
||||
learning_rate: 0.000003
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: true
|
||||
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: True
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
|
||||
@@ -27,7 +27,7 @@ num_epochs: 4
|
||||
learning_rate: 0.00001
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
bf16: auto
|
||||
tf32: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
|
||||
@@ -2,7 +2,6 @@ base_model: Qwen/Qwen-7B
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
|
||||
is_qwen_derived_model: true
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: true
|
||||
@@ -43,8 +42,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: false
|
||||
@@ -58,7 +57,7 @@ flash_attention:
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -2,7 +2,6 @@ base_model: Qwen/Qwen-7B
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
|
||||
is_qwen_derived_model: true
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
@@ -43,8 +42,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: false
|
||||
@@ -58,7 +57,7 @@ flash_attention:
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -34,7 +34,7 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.0000002
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
bf16: auto
|
||||
tf32: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
|
||||
@@ -33,7 +33,7 @@ lr_scheduler:
|
||||
learning_rate: 0.00001
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
bf16: auto
|
||||
tf32: true
|
||||
gradient_checkpointing:
|
||||
early_stopping_patience:
|
||||
|
||||
69
examples/stablelm-2/1.6b/fft.yml
Normal file
69
examples/stablelm-2/1.6b/fft.yml
Normal file
@@ -0,0 +1,69 @@
|
||||
base_model: stabilityai/stablelm-2-1_6b
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
strict: false
|
||||
|
||||
datasets:
|
||||
- path: mhenrichsen/alpaca_2k_test
|
||||
type: alpaca
|
||||
dataset_prepared_path: last_run_prepared
|
||||
val_set_size: 0.05
|
||||
output_dir: ./out
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
|
||||
adapter:
|
||||
lora_model_dir:
|
||||
lora_r:
|
||||
lora_alpha:
|
||||
lora_dropout:
|
||||
lora_target_linear:
|
||||
lora_fan_in_fan_out:
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 1
|
||||
micro_batch_size: 1
|
||||
num_epochs: 1
|
||||
optimizer: adamw_bnb_8bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
flash_attn_cross_entropy: false
|
||||
flash_attn_rms_norm: true
|
||||
flash_attn_fuse_qkv: false
|
||||
flash_attn_fuse_mlp: true
|
||||
|
||||
warmup_steps: 100
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed: #deepspeed_configs/zero2.json # multi-gpu only
|
||||
weight_decay: 0.1
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
66
examples/stablelm-2/1.6b/lora.yml
Normal file
66
examples/stablelm-2/1.6b/lora.yml
Normal file
@@ -0,0 +1,66 @@
|
||||
base_model: stabilityai/stablelm-2-1_6b
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
strict: false
|
||||
|
||||
datasets:
|
||||
- path: mhenrichsen/alpaca_2k_test
|
||||
type: alpaca
|
||||
dataset_prepared_path:
|
||||
val_set_size: 0.05
|
||||
output_dir: ./lora-out
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
|
||||
adapter: lora
|
||||
lora_model_dir:
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_linear: true
|
||||
lora_fan_in_fan_out:
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 1
|
||||
micro_batch_size: 1
|
||||
num_epochs: 1
|
||||
optimizer: adamw_bnb_8bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
flash_attn_cross_entropy: false
|
||||
flash_attn_rms_norm: true
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
36
examples/stablelm-2/README.md
Normal file
36
examples/stablelm-2/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# StableLM 2
|
||||
|
||||
This repository contains examples for training and processing using StableLM-2. It also includes a section to help you estimate the GPU requirements for your specific use case.
|
||||
|
||||
## Estimating GPU Requirements
|
||||
|
||||
| type | deepspeed | batch size | context length | vRAM GPU (GBs) |
|
||||
|---------------|-----------|------------|----------------|----------------|
|
||||
| full finetune | N/A | 1 | 4096 | ~21.5GBs |
|
||||
| full finetune | zero2 | 1 | 4096 | ~20GBs |
|
||||
| lora | N/A | 1 | 4096 | ~16.6GBs |
|
||||
|
||||
The above are estimates and might differ slight depending on the setup for example whether you pack your sequence lengths or not (the above assumes you do to length 4096).
|
||||
|
||||
This blog post from Hamel Husain was a great resource for estimating these numbers: https://hamel.dev/notes/llm/03_estimating_vram.html
|
||||
|
||||
## Training
|
||||
We have example scripts here for both full finetuning and lora using the popular alpaca dataset:
|
||||
|
||||
```shell
|
||||
# preprocess the dataset
|
||||
CUDA_VISIBLE_DEVICES="" python -m axolotl.cli.preprocess examples/stablelm-2/1.6b/lora.yml
|
||||
```
|
||||
|
||||
Single GPU Training:
|
||||
```shell
|
||||
python -m axolotl.cli.train examples/stablelm-2/fft.yml --deepspeed deepspeed_configs/zero2.json
|
||||
# OR
|
||||
python -m axolotl.cli.train examples/stablelm-2/1.6b/lora.yml
|
||||
```
|
||||
|
||||
Multinode GPU Training with `accelerate`:
|
||||
```shell
|
||||
# make sure you've configured accelerate properly
|
||||
accelerate launch -m axolotl.cli.train examples/stablelm-2/1.6b/fft.yml --deepspeed deepspeed_configs/zero2.json
|
||||
```
|
||||
69
examples/starcoder2/qlora.yml
Normal file
69
examples/starcoder2/qlora.yml
Normal file
@@ -0,0 +1,69 @@
|
||||
base_model: bigcode/starcoder2-3b
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
datasets:
|
||||
- path: mhenrichsen/alpaca_2k_test
|
||||
type: alpaca
|
||||
|
||||
|
||||
dataset_prepared_path:
|
||||
val_set_size: 0.2
|
||||
output_dir: ./qlora
|
||||
|
||||
adapter: qlora
|
||||
lora_model_dir:
|
||||
|
||||
sequence_len: 8192
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_modules:
|
||||
lora_target_linear: true
|
||||
lora_fan_in_fan_out:
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_run_id:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 8
|
||||
micro_batch_size: 2
|
||||
num_epochs: 3
|
||||
optimizer: adamw_bnb_8bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 2e-5
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16: false
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: true
|
||||
|
||||
warmup_steps: 20
|
||||
evals_per_epoch: 4
|
||||
eval_steps:
|
||||
eval_table_size:
|
||||
saves_per_epoch: 4
|
||||
save_steps:
|
||||
save_total_limit: 2
|
||||
debug:
|
||||
deepspeed:
|
||||
weight_decay:
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
64
examples/tiny-llama/lora-mps.yml
Normal file
64
examples/tiny-llama/lora-mps.yml
Normal file
@@ -0,0 +1,64 @@
|
||||
base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
strict: false
|
||||
|
||||
datasets:
|
||||
- path: mhenrichsen/alpaca_2k_test
|
||||
type: alpaca
|
||||
dataset_prepared_path:
|
||||
val_set_size: 0
|
||||
output_dir: ./lora-out
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
eval_sample_packing: false
|
||||
|
||||
adapter: lora
|
||||
lora_model_dir:
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_linear: true
|
||||
lora_fan_in_fan_out:
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 4
|
||||
micro_batch_size: 2
|
||||
num_epochs: 4
|
||||
optimizer: adamw_torch
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: auto
|
||||
fp16: false
|
||||
tf32: true
|
||||
|
||||
gradient_checkpointing: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
logging_steps: 1
|
||||
xformers_attention:
|
||||
flash_attention: false
|
||||
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 0
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
weight_decay: 0.0
|
||||
fsdp:
|
||||
fsdp_config:
|
||||
special_tokens:
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
@@ -16,6 +15,7 @@ output_dir: ./lora-out
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
eval_sample_packing: false
|
||||
pad_to_sequence_len: true
|
||||
|
||||
adapter: lora
|
||||
@@ -41,8 +41,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -2,7 +2,6 @@ base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
@@ -12,6 +11,7 @@ max_steps: 200
|
||||
pretraining_dataset:
|
||||
path: c4
|
||||
name: en
|
||||
type: pretrain
|
||||
dataset_prepared_path:
|
||||
val_set_size: 0.0
|
||||
output_dir: ./model-out
|
||||
@@ -34,8 +34,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
@@ -43,8 +42,8 @@ learning_rate: 0.0002
|
||||
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -62,8 +62,8 @@ lr_scheduler: cosine
|
||||
learning_rate: 0.00002
|
||||
train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
gradient_checkpointing: true
|
||||
# stop training after this many evaluation losses have increased in a row
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
base_model: 01-ai/Yi-34B-Chat
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
is_mistral_derived_model: false
|
||||
is_llama_derived_model: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
sequence_len: 1024
|
||||
bf16: true
|
||||
fp16: false
|
||||
bf16: auto
|
||||
fp16:
|
||||
tf32: false
|
||||
flash_attention: true
|
||||
special_tokens:
|
||||
@@ -29,7 +28,7 @@ num_epochs: 1
|
||||
val_set_size: 0.1
|
||||
evals_per_epoch: 5
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
eval_sample_packing: false
|
||||
eval_batch_size: 1
|
||||
|
||||
|
||||
BIN
favicon.jpg
Normal file
BIN
favicon.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.5 KiB |
19
index.qmd
Normal file
19
index.qmd
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
|
||||
```{python}
|
||||
#|output: asis
|
||||
#|echo: false
|
||||
|
||||
# This cell steals the README as the home page for now, but excludes the table of contents (quarto adds its own)
|
||||
import re
|
||||
pattern = re.compile(
|
||||
r"<table>\s*<tr>\s*<td>\s*## Table of Contents.*?</td>\s*</tr>\s*</table>",
|
||||
re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
|
||||
with open('README.md', 'r') as f:
|
||||
txt = f.read()
|
||||
|
||||
cleaned = pattern.sub("", txt)
|
||||
print(cleaned)
|
||||
```
|
||||
@@ -1,3 +1,4 @@
|
||||
pre-commit
|
||||
black
|
||||
mypy
|
||||
types-requests
|
||||
|
||||
@@ -1,35 +1,34 @@
|
||||
--extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
|
||||
packaging==23.2
|
||||
peft==0.7.0
|
||||
transformers @ git+https://github.com/huggingface/transformers.git@3cefac1d974db5e2825a0cb2b842883a628be7a0
|
||||
peft==0.9.0
|
||||
transformers @ git+https://github.com/huggingface/transformers.git@73a73b415e36f41481369f6129cb4b62bb127a78
|
||||
tokenizers==0.15.0
|
||||
bitsandbytes>=0.41.1
|
||||
accelerate @ git+https://github.com/huggingface/accelerate.git@0d2280dadc6a93413a5496613b7fdda3a4d2551b
|
||||
deepspeed
|
||||
bitsandbytes==0.43.0
|
||||
accelerate==0.28.0
|
||||
deepspeed==0.13.1
|
||||
pydantic==2.6.3
|
||||
addict
|
||||
fire
|
||||
PyYAML>=6.0
|
||||
requests
|
||||
datasets>=2.15.0
|
||||
flash-attn==2.3.3
|
||||
flash-attn==2.5.5
|
||||
sentencepiece
|
||||
wandb
|
||||
einops
|
||||
xformers==0.0.22
|
||||
optimum==1.13.2
|
||||
optimum==1.16.2
|
||||
hf_transfer
|
||||
colorama
|
||||
numba
|
||||
numpy>=1.24.4
|
||||
mlflow
|
||||
# qlora things
|
||||
bert-score==0.3.13
|
||||
evaluate==0.4.0
|
||||
rouge-score==0.1.2
|
||||
evaluate==0.4.1
|
||||
scipy
|
||||
scikit-learn==1.2.2
|
||||
pynvml
|
||||
art
|
||||
fschat==0.2.34
|
||||
fschat==0.2.36
|
||||
gradio==3.50.2
|
||||
tensorboard
|
||||
|
||||
@@ -40,4 +39,4 @@ s3fs
|
||||
gcsfs
|
||||
# adlfs
|
||||
|
||||
trl>=0.7.9
|
||||
trl @ git+https://github.com/huggingface/trl.git@304e208f778a5442c30cdda500348226cdc97d90
|
||||
|
||||
40
scripts/cloud-entrypoint.sh
Executable file
40
scripts/cloud-entrypoint.sh
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Export specific ENV variables to /etc/rp_environment
|
||||
echo "Exporting environment variables..."
|
||||
printenv | grep -E '^RUNPOD_|^PATH=|^_=' | sed 's/^\(.*\)=\(.*\)$/export \1="\2"/' >> /etc/rp_environment
|
||||
echo 'source /etc/rp_environment' >> ~/.bashrc
|
||||
|
||||
if [[ $PUBLIC_KEY ]]; then
|
||||
# runpod
|
||||
mkdir -p ~/.ssh
|
||||
chmod 700 ~/.ssh
|
||||
echo $PUBLIC_KEY >> ~/.ssh/authorized_keys
|
||||
chmod 700 -R ~/.ssh
|
||||
# Start the SSH service in the background
|
||||
service ssh start
|
||||
elif [ -n "$SSH_KEY" ]; then
|
||||
# latitude.sh
|
||||
mkdir -p ~/.ssh
|
||||
chmod 700 ~/.ssh
|
||||
echo $SSH_KEY >> ~/.ssh/authorized_keys
|
||||
chmod 700 -R ~/.ssh
|
||||
# Start the SSH service in the background
|
||||
service ssh start
|
||||
else
|
||||
echo "No PUBLIC_KEY or SSH_KEY environment variable provided, not starting openSSH daemon"
|
||||
fi
|
||||
|
||||
# Check if JUPYTER_PASSWORD is set and not empty
|
||||
if [ -n "$JUPYTER_PASSWORD" ]; then
|
||||
# Set JUPYTER_TOKEN to the value of JUPYTER_PASSWORD
|
||||
export JUPYTER_TOKEN="$JUPYTER_PASSWORD"
|
||||
fi
|
||||
|
||||
if [ "$JUPYTER_DISABLE" != "1" ]; then
|
||||
# Run Jupyter Lab in the background
|
||||
jupyter lab --port=8888 --ip=* --allow-root --ServerApp.allow_origin=* --ServerApp.preferred_dir=/workspace &
|
||||
fi
|
||||
|
||||
# Execute the passed arguments (CMD)
|
||||
exec "$@"
|
||||
17
scripts/motd
Normal file
17
scripts/motd
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
dP dP dP
|
||||
88 88 88
|
||||
.d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88
|
||||
88' `88 `8bd8' 88' `88 88 88' `88 88 88
|
||||
88. .88 .d88b. 88. .88 88 88. .88 88 88
|
||||
`88888P8 dP' `dP `88888P' dP `88888P' dP dP
|
||||
|
||||
Welcome to the axolotl cloud image! If the you've mounted a disk to /workspace and the axolotl directory ie empty, run the following commands:
|
||||
|
||||
```
|
||||
cd /workspace
|
||||
rm -rf /workspace/axolotl
|
||||
git clone https://github.com/OpenAccess-AI-Collective/axolotl.git
|
||||
cd axolotl
|
||||
pip install --no-deps -e .
|
||||
```
|
||||
@@ -1,21 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Export specific ENV variables to /etc/rp_environment
|
||||
echo "Exporting environment variables..."
|
||||
printenv | grep -E '^RUNPOD_|^PATH=|^_=' | sed 's/^\(.*\)=\(.*\)$/export \1="\2"/' >> /etc/rp_environment
|
||||
echo 'source /etc/rp_environment' >> ~/.bashrc
|
||||
|
||||
if [[ $PUBLIC_KEY ]]
|
||||
then
|
||||
mkdir -p ~/.ssh
|
||||
chmod 700 ~/.ssh
|
||||
echo $PUBLIC_KEY >> ~/.ssh/authorized_keys
|
||||
chmod 700 -R ~/.ssh
|
||||
# Start the SSH service in the background
|
||||
service ssh start
|
||||
else
|
||||
echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon"
|
||||
fi
|
||||
|
||||
# Execute the passed arguments (CMD)
|
||||
exec "$@"
|
||||
40
setup.py
40
setup.py
@@ -1,5 +1,7 @@
|
||||
"""setup.py for axolotl"""
|
||||
|
||||
import platform
|
||||
import re
|
||||
from importlib.metadata import PackageNotFoundError, version
|
||||
|
||||
from setuptools import find_packages, setup
|
||||
@@ -16,6 +18,7 @@ def parse_requirements():
|
||||
or "flash-attention" in line
|
||||
or "deepspeed" in line
|
||||
or "mamba-ssm" in line
|
||||
or "lion-pytorch" in line
|
||||
)
|
||||
if line.startswith("--extra-index-url"):
|
||||
# Handle custom index URLs
|
||||
@@ -26,10 +29,25 @@ def parse_requirements():
|
||||
_install_requires.append(line)
|
||||
|
||||
try:
|
||||
torch_version = version("torch")
|
||||
if torch_version.startswith("2.1.1"):
|
||||
if "Darwin" in platform.system():
|
||||
_install_requires.pop(_install_requires.index("xformers==0.0.22"))
|
||||
_install_requires.append("xformers==0.0.23")
|
||||
else:
|
||||
torch_version = version("torch")
|
||||
_install_requires.append(f"torch=={torch_version}")
|
||||
|
||||
version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version)
|
||||
if version_match:
|
||||
major, minor, patch = version_match.groups()
|
||||
major, minor = int(major), int(minor)
|
||||
patch = (
|
||||
int(patch) if patch is not None else 0
|
||||
) # Default patch to 0 if not present
|
||||
else:
|
||||
raise ValueError("Invalid version format")
|
||||
|
||||
if (major, minor) >= (2, 1):
|
||||
_install_requires.pop(_install_requires.index("xformers==0.0.22"))
|
||||
_install_requires.append("xformers>=0.0.23")
|
||||
except PackageNotFoundError:
|
||||
pass
|
||||
|
||||
@@ -41,7 +59,7 @@ install_requires, dependency_links = parse_requirements()
|
||||
|
||||
setup(
|
||||
name="axolotl",
|
||||
version="0.3.0",
|
||||
version="0.4.0",
|
||||
description="LLM Trainer",
|
||||
long_description="Axolotl is a tool designed to streamline the fine-tuning of various AI models, offering support for multiple configurations and architectures.",
|
||||
package_dir={"": "src"},
|
||||
@@ -50,13 +68,14 @@ setup(
|
||||
dependency_links=dependency_links,
|
||||
extras_require={
|
||||
"flash-attn": [
|
||||
"flash-attn==2.3.3",
|
||||
"flash-attn==2.5.5",
|
||||
],
|
||||
"fused-dense-lib": [
|
||||
"fused-dense-lib @ git+https://github.com/Dao-AILab/flash-attention@v2.3.3#subdirectory=csrc/fused_dense_lib",
|
||||
],
|
||||
"deepspeed": [
|
||||
"deepspeed",
|
||||
"deepspeed==0.13.1",
|
||||
"deepspeed-kernels",
|
||||
],
|
||||
"mamba-ssm": [
|
||||
"mamba-ssm==1.0.1",
|
||||
@@ -64,5 +83,14 @@ setup(
|
||||
"auto-gptq": [
|
||||
"auto-gptq==0.5.1",
|
||||
],
|
||||
"mlflow": [
|
||||
"mlflow",
|
||||
],
|
||||
"lion-pytorch": [
|
||||
"lion-pytorch==0.1.2",
|
||||
],
|
||||
"galore": [
|
||||
"galore_torch",
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1,26 +1,29 @@
|
||||
"""Prepare and train a model on a dataset. Can also infer from a model or merge lora"""
|
||||
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import gradio as gr
|
||||
import requests
|
||||
import torch
|
||||
import yaml
|
||||
|
||||
# add src to the pythonpath so we don't need to pip install this
|
||||
from accelerate.commands.config import config_args
|
||||
from art import text2art
|
||||
from datasets import concatenate_datasets, load_dataset
|
||||
from huggingface_hub import HfApi
|
||||
from huggingface_hub.utils import LocalTokenNotFoundError
|
||||
from transformers import GenerationConfig, TextIteratorStreamer, TextStreamer
|
||||
from transformers.utils import is_torch_bf16_gpu_available
|
||||
|
||||
from axolotl.common.cli import TrainerCliArgs, load_model_and_tokenizer
|
||||
from axolotl.logging_config import configure_logging
|
||||
@@ -30,7 +33,7 @@ from axolotl.utils.config import (
|
||||
normalize_config,
|
||||
validate_config,
|
||||
)
|
||||
from axolotl.utils.data import prepare_dataset
|
||||
from axolotl.utils.data import load_prepare_dpo_datasets, prepare_dataset
|
||||
from axolotl.utils.dict import DictDefault
|
||||
from axolotl.utils.distributed import is_main_process
|
||||
from axolotl.utils.mlflow_ import setup_mlflow_env_vars
|
||||
@@ -60,6 +63,52 @@ def print_axolotl_text_art(suffix=None):
|
||||
print(ascii_art)
|
||||
|
||||
|
||||
def check_remote_config(config: Union[str, Path]):
|
||||
# Check if the config is a valid HTTPS URL to a .yml or .yaml file
|
||||
if not (isinstance(config, str) and config.startswith("https://")):
|
||||
return config # Return the original value if it's not a valid URL
|
||||
|
||||
filename = os.path.basename(urlparse(config).path)
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
|
||||
try:
|
||||
response = requests.get(config, timeout=30)
|
||||
response.raise_for_status() # Check for HTTP errors
|
||||
|
||||
content = response.content
|
||||
try:
|
||||
# Try parsing as JSON first to catch cases where JSON content is mistakenly considered YAML
|
||||
json.loads(content)
|
||||
# Log a warning but do not raise an error; JSON is technically valid YAML - this can happen when you forget to point to a raw github link
|
||||
LOG.warning(
|
||||
f"Warning: The content of the file at {config} is JSON, which is technically valid YAML but might not be intended."
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
# If it's not valid JSON, verify it's valid YAML
|
||||
try:
|
||||
yaml.safe_load(content)
|
||||
except yaml.YAMLError as err:
|
||||
raise ValueError(
|
||||
f"Failed to parse the content at {config} as YAML: {err}"
|
||||
) from err
|
||||
|
||||
# Write the content to a file if it's valid YAML (or JSON treated as YAML)
|
||||
output_path = Path(temp_dir) / filename
|
||||
with open(output_path, "wb") as file:
|
||||
file.write(content)
|
||||
LOG.info(
|
||||
f"Using the following config obtained from {config}:\n\n{content.decode('utf-8')}\n"
|
||||
)
|
||||
return output_path
|
||||
|
||||
except requests.RequestException as err:
|
||||
# This catches all requests-related exceptions including HTTPError
|
||||
raise RuntimeError(f"Failed to download {config}: {err}") from err
|
||||
except Exception as err:
|
||||
# Catch-all for any other exceptions
|
||||
raise err
|
||||
|
||||
|
||||
def get_multi_line_input() -> Optional[str]:
|
||||
print("Give me an instruction (Ctrl + D to submit): ")
|
||||
instruction = ""
|
||||
@@ -79,7 +128,11 @@ def do_merge_lora(
|
||||
|
||||
LOG.info("running merge of LoRA with base model")
|
||||
model = model.merge_and_unload(progressbar=True)
|
||||
model.to(dtype=cfg.torch_dtype)
|
||||
try:
|
||||
model.to(dtype=cfg.torch_dtype)
|
||||
except RuntimeError:
|
||||
pass
|
||||
model.generation_config.do_sample = True
|
||||
|
||||
if cfg.local_rank == 0:
|
||||
LOG.info(f"saving merged model to: {str(Path(cfg.output_dir) / 'merged')}")
|
||||
@@ -161,6 +214,8 @@ def do_inference_gradio(
|
||||
cfg: DictDefault,
|
||||
cli_args: TrainerCliArgs,
|
||||
):
|
||||
import gradio as gr
|
||||
|
||||
model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
|
||||
prompter = cli_args.prompter
|
||||
default_tokens = {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>"}
|
||||
@@ -267,14 +322,14 @@ def check_not_in(list1: List[str], list2: Union[Dict[str, Any], List[str]]) -> b
|
||||
return not any(el in list2 for el in list1)
|
||||
|
||||
|
||||
def load_cfg(config: Path = Path("examples/"), **kwargs):
|
||||
def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs):
|
||||
config = check_remote_config(config)
|
||||
if Path(config).is_dir():
|
||||
config = choose_config(config)
|
||||
config = choose_config(Path(config))
|
||||
|
||||
# load the config from the yaml file
|
||||
with open(config, encoding="utf-8") as file:
|
||||
cfg: DictDefault = DictDefault(yaml.safe_load(file))
|
||||
cfg.axolotl_config_path = config
|
||||
# if there are any options passed in the cli, if it is something that seems valid from the yaml,
|
||||
# then overwrite the value
|
||||
cfg_keys = cfg.keys()
|
||||
@@ -287,7 +342,22 @@ def load_cfg(config: Path = Path("examples/"), **kwargs):
|
||||
else:
|
||||
cfg[k] = kwargs[k]
|
||||
|
||||
validate_config(cfg)
|
||||
cfg.axolotl_config_path = config
|
||||
|
||||
try:
|
||||
device_props = torch.cuda.get_device_properties("cuda")
|
||||
gpu_version = "sm_" + str(device_props.major) + str(device_props.minor)
|
||||
except: # pylint: disable=bare-except # noqa: E722
|
||||
gpu_version = None
|
||||
|
||||
cfg = validate_config(
|
||||
cfg,
|
||||
capabilities={
|
||||
"bf16": is_torch_bf16_gpu_available(),
|
||||
"n_gpu": os.environ.get("WORLD_SIZE", 1),
|
||||
"compute_capability": gpu_version,
|
||||
},
|
||||
)
|
||||
|
||||
prepare_optim_env(cfg)
|
||||
|
||||
@@ -343,78 +413,7 @@ def load_rl_datasets(
|
||||
cfg: DictDefault,
|
||||
cli_args: TrainerCliArgs, # pylint: disable=unused-argument
|
||||
) -> TrainDatasetMeta:
|
||||
train_datasets: List[Any] = []
|
||||
for i, ds_cfg in enumerate(cfg.datasets):
|
||||
train_datasets.insert(i, load_dataset(ds_cfg["path"], split=ds_cfg["split"]))
|
||||
# eval_dataset = load_dataset(
|
||||
# cfg.test_datasets[0]["path"], split=cfg.test_datasets[0]["split"]
|
||||
# )
|
||||
eval_dataset = None
|
||||
|
||||
def argilla_apply_chatml(sample): # pylint: disable=possibly-unused-variable
|
||||
if "system" in sample and sample["system"]:
|
||||
sample["prompt"] = (
|
||||
f"<|im_start|>system\n{sample['system']}<|im_end|>\n"
|
||||
f"<|im_start|>user\n{sample['instruction']}<|im_end|>\n<|im_start|>assistant\n"
|
||||
)
|
||||
else:
|
||||
sample[
|
||||
"prompt"
|
||||
] = f"<|im_start|>user\n{sample['instruction']}<|im_end|>\n<|im_start|>assistant\n"
|
||||
sample["chosen"] = f"{sample['chosen_response']}<|im_end|>"
|
||||
sample["rejected"] = f"{sample['rejected_response']}<|im_end|>"
|
||||
return sample
|
||||
|
||||
def intel_apply_chatml(sample): # pylint: disable=possibly-unused-variable
|
||||
if "system" in sample and sample["system"]:
|
||||
sample["prompt"] = (
|
||||
f"<|im_start|>system\n{sample['system']}<|im_end|>\n"
|
||||
f"<|im_start|>user\n{sample['question']}<|im_end|>\n<|im_start|>assistant\n"
|
||||
)
|
||||
else:
|
||||
sample[
|
||||
"prompt"
|
||||
] = f"<|im_start|>user\n{sample['question']}<|im_end|>\n<|im_start|>assistant\n"
|
||||
sample["chosen"] = f"{sample['chosen']}<|im_end|>"
|
||||
sample["rejected"] = f"{sample['rejected']}<|im_end|>"
|
||||
return sample
|
||||
|
||||
def apply_chatml(sample): # pylint: disable=possibly-unused-variable
|
||||
if "system" in sample and sample["system"]:
|
||||
sample["prompt"] = (
|
||||
f"<|im_start|>system\n{sample['system']}<|im_end|>\n"
|
||||
f"<|im_start|>user\n{sample['prompt']}<|im_end|>\n<|im_start|>assistant\n"
|
||||
)
|
||||
else:
|
||||
sample[
|
||||
"prompt"
|
||||
] = f"<|im_start|>user\n{sample['prompt']}<|im_end|>\n<|im_start|>assistant\n"
|
||||
sample["chosen"] = f"{sample['chosen']}<|im_end|>"
|
||||
sample["rejected"] = f"{sample['rejected']}<|im_end|>"
|
||||
return sample
|
||||
|
||||
def ultra_apply_chatml(sample): # pylint: disable=possibly-unused-variable
|
||||
if "system" in sample and sample["system"]:
|
||||
sample["prompt"] = (
|
||||
f"<|im_start|>system\n{sample['system']}<|im_end|>\n"
|
||||
f"<|im_start|>user\n{sample['prompt']}<|im_end|>\n<|im_start|>assistant\n"
|
||||
)
|
||||
else:
|
||||
sample[
|
||||
"prompt"
|
||||
] = f"<|im_start|>user\n{sample['prompt']}<|im_end|>\n<|im_start|>assistant\n"
|
||||
sample["chosen"] = f"{sample['chosen'][1]['content']}<|im_end|>"
|
||||
sample["rejected"] = f"{sample['rejected'][1]['content']}<|im_end|>"
|
||||
return sample
|
||||
|
||||
for i, data_set in enumerate(train_datasets):
|
||||
_type = cfg.datasets[i]["type"]
|
||||
ds_type_fn = locals()[_type]
|
||||
train_datasets[i] = data_set.map(ds_type_fn)
|
||||
train_dataset = concatenate_datasets(train_datasets)
|
||||
|
||||
# eval_dataset = eval_dataset.map(intel_apply_chatml)
|
||||
|
||||
train_dataset, eval_dataset = load_prepare_dpo_datasets(cfg)
|
||||
total_num_steps = int(
|
||||
math.ceil(len(train_dataset) * cfg.num_epochs / cfg.batch_size)
|
||||
)
|
||||
@@ -434,6 +433,13 @@ def check_accelerate_default_config():
|
||||
|
||||
|
||||
def check_user_token():
|
||||
# Skip check if HF_HUB_OFFLINE is set to True
|
||||
if os.getenv("HF_HUB_OFFLINE") == "1":
|
||||
LOG.info(
|
||||
"Skipping HuggingFace token verification because HF_HUB_OFFLINE is set to True. Only local files will be used."
|
||||
)
|
||||
return True
|
||||
|
||||
# Verify if token is valid
|
||||
api = HfApi()
|
||||
try:
|
||||
|
||||
@@ -3,6 +3,7 @@ CLI to run training on a model
|
||||
"""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import fire
|
||||
import transformers
|
||||
@@ -13,18 +14,21 @@ from axolotl.cli import (
|
||||
check_user_token,
|
||||
load_cfg,
|
||||
load_datasets,
|
||||
load_rl_datasets,
|
||||
print_axolotl_text_art,
|
||||
)
|
||||
from axolotl.common.cli import PreprocessCliArgs
|
||||
from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH
|
||||
from axolotl.prompt_strategies.sharegpt import register_chatml_template
|
||||
|
||||
LOG = logging.getLogger("axolotl.cli.preprocess")
|
||||
|
||||
|
||||
def do_cli(config: Path = Path("examples/"), **kwargs):
|
||||
def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
|
||||
# pylint: disable=duplicate-code
|
||||
print_axolotl_text_art()
|
||||
parsed_cfg = load_cfg(config, **kwargs)
|
||||
parsed_cfg.is_preprocess = True
|
||||
check_accelerate_default_config()
|
||||
check_user_token()
|
||||
parser = transformers.HfArgumentParser((PreprocessCliArgs))
|
||||
@@ -32,6 +36,14 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
|
||||
return_remaining_strings=True
|
||||
)
|
||||
|
||||
if parsed_cfg.chat_template == "chatml" and parsed_cfg.default_system_message:
|
||||
LOG.info(
|
||||
f"ChatML set. Adding default system message: {parsed_cfg.default_system_message}"
|
||||
)
|
||||
register_chatml_template(parsed_cfg.default_system_message)
|
||||
else:
|
||||
register_chatml_template()
|
||||
|
||||
if not parsed_cfg.dataset_prepared_path:
|
||||
msg = (
|
||||
Fore.RED
|
||||
@@ -42,7 +54,11 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
|
||||
LOG.warning(msg)
|
||||
parsed_cfg.dataset_prepared_path = DEFAULT_DATASET_PREPARED_PATH
|
||||
|
||||
_ = load_datasets(cfg=parsed_cfg, cli_args=parsed_cli_args)
|
||||
if parsed_cfg.rl and parsed_cfg.rl != "orpo":
|
||||
load_rl_datasets(cfg=parsed_cfg, cli_args=parsed_cli_args)
|
||||
else:
|
||||
load_datasets(cfg=parsed_cfg, cli_args=parsed_cli_args)
|
||||
|
||||
LOG.info(
|
||||
Fore.GREEN
|
||||
+ f"Success! Preprocessed data path: `dataset_prepared_path: {parsed_cfg.dataset_prepared_path}`"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user