Compare commits
53 Commits
djsaunde-p
...
pixtral_in
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fbf3ca86c9 | ||
|
|
2de866e92f | ||
|
|
295e07dcca | ||
|
|
3c07b6d6b1 | ||
|
|
89dae7dc6d | ||
|
|
1b54af8e54 | ||
|
|
ca7b56cba3 | ||
|
|
ea8269d2eb | ||
|
|
13ca7ed087 | ||
|
|
0dfd8541ee | ||
|
|
75e1d3537f | ||
|
|
2b7f3bd6ab | ||
|
|
d85a229afe | ||
|
|
355cd7c872 | ||
|
|
eab1638686 | ||
|
|
a3a4d22709 | ||
|
|
f9eb7d8663 | ||
|
|
343771a6d3 | ||
|
|
d2c32d0cba | ||
|
|
cec9887609 | ||
|
|
88b2cae748 | ||
|
|
aea2565938 | ||
|
|
1ad56303b2 | ||
|
|
dc055a4ef7 | ||
|
|
169116a50f | ||
|
|
43e412f660 | ||
|
|
7aa57803e1 | ||
|
|
1969fa3bf0 | ||
|
|
4078f37076 | ||
|
|
f073af6d99 | ||
|
|
139d2612fa | ||
|
|
20573fd13e | ||
|
|
2b7b4af81c | ||
|
|
d56260c8d5 | ||
|
|
cac785ec0e | ||
|
|
e62991edef | ||
|
|
fd9e7b55f6 | ||
|
|
c0c53eb62f | ||
|
|
b0fbd4d11d | ||
|
|
1a70d4d6a4 | ||
|
|
d8787a433f | ||
|
|
e775422269 | ||
|
|
97178f5960 | ||
|
|
4698eed43f | ||
|
|
f84c3b37e7 | ||
|
|
c39971c659 | ||
|
|
33a178c788 | ||
|
|
db15605e7e | ||
|
|
9e112bc8b5 | ||
|
|
e038410778 | ||
|
|
f4385c3cf4 | ||
|
|
d58c772df6 | ||
|
|
69265a53b5 |
7
.github/workflows/pypi.yml
vendored
7
.github/workflows/pypi.yml
vendored
@@ -13,13 +13,10 @@ jobs:
|
|||||||
permissions:
|
permissions:
|
||||||
contents: write
|
contents: write
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Create release
|
- name: Create release
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
run: gh release create "$GITHUB_REF_NAME" --generate-notes
|
run: gh release create "$GITHUB_REF_NAME" # GITHUB_REF_NAME is the tag name in `on.push.tags` workflows
|
||||||
pypi-publish:
|
pypi-publish:
|
||||||
name: Upload release to PyPI
|
name: Upload release to PyPI
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -41,7 +38,7 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip3 install wheel packaging
|
pip3 install wheel packaging
|
||||||
pip3 install --no-build-isolation -e .
|
pip3 install -e .
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
|
||||||
- name: Extract tag name
|
- name: Extract tag name
|
||||||
|
|||||||
25
.github/workflows/tests-nightly.yml
vendored
25
.github/workflows/tests-nightly.yml
vendored
@@ -23,15 +23,9 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
max-parallel: 2
|
|
||||||
matrix:
|
matrix:
|
||||||
python_version: ["3.10", "3.11"]
|
python_version: ["3.10", "3.11"]
|
||||||
pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
|
pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
|
||||||
exclude:
|
|
||||||
- python_version: "3.10"
|
|
||||||
pytorch_version: "2.4.1"
|
|
||||||
- python_version: "3.10"
|
|
||||||
pytorch_version: "2.5.1"
|
|
||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -44,11 +38,6 @@ jobs:
|
|||||||
python-version: ${{ matrix.python_version }}
|
python-version: ${{ matrix.python_version }}
|
||||||
cache: 'pip' # caching pip dependencies
|
cache: 'pip' # caching pip dependencies
|
||||||
|
|
||||||
- name: upgrade pip
|
|
||||||
run: |
|
|
||||||
pip3 install --upgrade pip
|
|
||||||
pip3 install --upgrade packaging setuptools wheel
|
|
||||||
|
|
||||||
- name: Install PyTorch
|
- name: Install PyTorch
|
||||||
run: |
|
run: |
|
||||||
pip3 install torch==${{ matrix.pytorch_version }} --index-url https://download.pytorch.org/whl/cpu
|
pip3 install torch==${{ matrix.pytorch_version }} --index-url https://download.pytorch.org/whl/cpu
|
||||||
@@ -65,23 +54,13 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pip3 install --upgrade pip
|
pip3 install --upgrade pip
|
||||||
pip3 install --upgrade packaging
|
pip3 install --upgrade packaging
|
||||||
pip3 install --no-build-isolation -U -e .
|
pip3 install -U -e .
|
||||||
python scripts/unsloth_install.py | sh
|
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
|
||||||
run: |
|
|
||||||
python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
|
|
||||||
|
|
||||||
- name: Ensure axolotl CLI was installed
|
|
||||||
run: |
|
|
||||||
axolotl --help
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
pytest -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/
|
pytest --ignore=tests/e2e/ tests/
|
||||||
pytest tests/patched/
|
|
||||||
|
|
||||||
- name: cleanup pip cache
|
- name: cleanup pip cache
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
41
.github/workflows/tests.yml
vendored
41
.github/workflows/tests.yml
vendored
@@ -10,7 +10,6 @@ on:
|
|||||||
- '.github/workflows/*.yml'
|
- '.github/workflows/*.yml'
|
||||||
- 'requirements-tests.txt'
|
- 'requirements-tests.txt'
|
||||||
- 'cicd/cicd.sh'
|
- 'cicd/cicd.sh'
|
||||||
- 'cicd/Dockerfile.jinja'
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- '**.py'
|
- '**.py'
|
||||||
@@ -18,7 +17,6 @@ on:
|
|||||||
- '.github/workflows/*.yml'
|
- '.github/workflows/*.yml'
|
||||||
- 'requirements-tests.txt'
|
- 'requirements-tests.txt'
|
||||||
- 'cicd/cicd.sh'
|
- 'cicd/cicd.sh'
|
||||||
- 'cicd/Dockerfile.jinja'
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
# Cancel jobs on the same ref if a new one is triggered
|
# Cancel jobs on the same ref if a new one is triggered
|
||||||
@@ -45,15 +43,9 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
max-parallel: 2
|
|
||||||
matrix:
|
matrix:
|
||||||
python_version: ["3.10", "3.11"]
|
python_version: ["3.10", "3.11"]
|
||||||
pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
|
pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
|
||||||
exclude:
|
|
||||||
- python_version: "3.10"
|
|
||||||
pytorch_version: "2.4.1"
|
|
||||||
- python_version: "3.10"
|
|
||||||
pytorch_version: "2.5.1"
|
|
||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -78,23 +70,14 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip3 show torch
|
pip3 show torch
|
||||||
pip3 install --no-build-isolation -U -e .
|
pip3 install -U -e .
|
||||||
python scripts/unsloth_install.py | sh
|
python scripts/unsloth_install.py | sh
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
python scripts/cutcrossentropy_install.py | sh
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
|
||||||
run: |
|
|
||||||
python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
|
|
||||||
|
|
||||||
- name: Ensure axolotl CLI was installed
|
|
||||||
run: |
|
|
||||||
axolotl --help
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/
|
pytest -n8 --ignore=tests/e2e/ tests/
|
||||||
pytest -v tests/patched/
|
|
||||||
|
|
||||||
- name: cleanup pip cache
|
- name: cleanup pip cache
|
||||||
run: |
|
run: |
|
||||||
@@ -105,7 +88,6 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
max-parallel: 1
|
|
||||||
matrix:
|
matrix:
|
||||||
python_version: ["3.11"]
|
python_version: ["3.11"]
|
||||||
pytorch_version: ["2.4.1", "2.5.1"]
|
pytorch_version: ["2.4.1", "2.5.1"]
|
||||||
@@ -124,7 +106,7 @@ jobs:
|
|||||||
- name: upgrade pip
|
- name: upgrade pip
|
||||||
run: |
|
run: |
|
||||||
pip3 install --upgrade pip
|
pip3 install --upgrade pip
|
||||||
pip3 install --upgrade packaging setuptools setuptools_scm build wheel
|
pip3 install --upgrade packaging setuptools wheel
|
||||||
|
|
||||||
- name: Install PyTorch
|
- name: Install PyTorch
|
||||||
run: |
|
run: |
|
||||||
@@ -133,24 +115,13 @@ jobs:
|
|||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
pip3 show torch
|
pip3 show torch
|
||||||
python -m build --no-isolation --sdist
|
python3 setup.py sdist
|
||||||
pip3 install --no-build-isolation dist/axolotl*.tar.gz
|
pip3 install dist/axolotl*.tar.gz
|
||||||
python scripts/unsloth_install.py | sh
|
|
||||||
python scripts/cutcrossentropy_install.py | sh
|
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
|
||||||
- name: Make sure PyTorch version wasn't clobbered
|
|
||||||
run: |
|
|
||||||
python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
|
|
||||||
|
|
||||||
- name: Ensure axolotl CLI was installed
|
|
||||||
run: |
|
|
||||||
axolotl --help
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/
|
pytest -n8 --ignore=tests/e2e/ tests/
|
||||||
pytest -v tests/patched/
|
|
||||||
|
|
||||||
- name: cleanup pip cache
|
- name: cleanup pip cache
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
include requirements.txt
|
include requirements.txt
|
||||||
include README.md
|
include README.md
|
||||||
include LICENSE
|
include LICENSE
|
||||||
include src/setuptools_axolotl_dynamic_dependencies.py
|
|
||||||
recursive-include axolotl *.py
|
recursive-include axolotl *.py
|
||||||
|
|||||||
289
README.md
289
README.md
@@ -10,13 +10,9 @@
|
|||||||
<img src="https://img.shields.io/github/license/axolotl-ai-cloud/axolotl.svg?color=blue" alt="GitHub License">
|
<img src="https://img.shields.io/github/license/axolotl-ai-cloud/axolotl.svg?color=blue" alt="GitHub License">
|
||||||
<img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/tests.yml/badge.svg" alt="tests">
|
<img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/tests.yml/badge.svg" alt="tests">
|
||||||
<a href="https://github.com/axolotl-ai-cloud/axolotl/releases"><img src="https://img.shields.io/github/release/axolotl-ai-cloud/axolotl.svg" alt="Releases"></a>
|
<a href="https://github.com/axolotl-ai-cloud/axolotl/releases"><img src="https://img.shields.io/github/release/axolotl-ai-cloud/axolotl.svg" alt="Releases"></a>
|
||||||
<br/>
|
|
||||||
<a href="https://github.com/axolotl-ai-cloud/axolotl/graphs/contributors"><img src="https://img.shields.io/github/contributors-anon/axolotl-ai-cloud/axolotl?color=yellow&style=flat-square" alt="contributors" style="height: 20px;"></a>
|
|
||||||
<img src="https://img.shields.io/github/stars/axolotl-ai-cloud/axolotl" alt="GitHub Repo stars">
|
<img src="https://img.shields.io/github/stars/axolotl-ai-cloud/axolotl" alt="GitHub Repo stars">
|
||||||
<br/>
|
</p>
|
||||||
<a href="https://discord.com/invite/HhrNrHJPRb"><img src="https://img.shields.io/badge/discord-7289da.svg?style=flat-square&logo=discord" alt="discord" style="height: 20px;"></a>
|
<p align="center">
|
||||||
<a href="https://twitter.com/axolotl_ai"><img src="https://img.shields.io/twitter/follow/axolotl_ai?style=social" alt="twitter" style="height: 20px;"></a>
|
|
||||||
<br/>
|
|
||||||
<img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/tests-nightly.yml/badge.svg" alt="tests-nightly">
|
<img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/tests-nightly.yml/badge.svg" alt="tests-nightly">
|
||||||
<img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/multi-gpu-e2e.yml/badge.svg" alt="multigpu-semi-weekly tests">
|
<img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/multi-gpu-e2e.yml/badge.svg" alt="multigpu-semi-weekly tests">
|
||||||
</p>
|
</p>
|
||||||
@@ -45,13 +41,9 @@ Features:
|
|||||||
## Table of Contents
|
## Table of Contents
|
||||||
- [Axolotl](#axolotl)
|
- [Axolotl](#axolotl)
|
||||||
- [Table of Contents](#table-of-contents)
|
- [Table of Contents](#table-of-contents)
|
||||||
- [Quickstart ⚡](#quickstart-)
|
|
||||||
- [Edge Builds](#edge-builds-)
|
|
||||||
- [Axolotl CLI Usage](#axolotl-cli-usage)
|
|
||||||
- [Badge ❤🏷️](#badge-️)
|
|
||||||
- [Contributing 🤝](#contributing-)
|
|
||||||
- [Sponsors 🤝❤](#sponsors-)
|
|
||||||
- [Axolotl supports](#axolotl-supports)
|
- [Axolotl supports](#axolotl-supports)
|
||||||
|
- [Quickstart ⚡](#quickstart-)
|
||||||
|
- [Usage](#usage)
|
||||||
- [Advanced Setup](#advanced-setup)
|
- [Advanced Setup](#advanced-setup)
|
||||||
- [Environment](#environment)
|
- [Environment](#environment)
|
||||||
- [Docker](#docker)
|
- [Docker](#docker)
|
||||||
@@ -83,6 +75,14 @@ Features:
|
|||||||
- [Tokenization Mismatch b/w Inference \& Training](#tokenization-mismatch-bw-inference--training)
|
- [Tokenization Mismatch b/w Inference \& Training](#tokenization-mismatch-bw-inference--training)
|
||||||
- [Debugging Axolotl](#debugging-axolotl)
|
- [Debugging Axolotl](#debugging-axolotl)
|
||||||
- [Need help? 🙋](#need-help-)
|
- [Need help? 🙋](#need-help-)
|
||||||
|
- [Badge ❤🏷️](#badge-️)
|
||||||
|
- [Community Showcase](#community-showcase)
|
||||||
|
- [Contributing 🤝](#contributing-)
|
||||||
|
- [Sponsors 🤝❤](#sponsors-)
|
||||||
|
- [💎 Diamond Sponsors - Contact directly](#-diamond-sponsors---contact-directly)
|
||||||
|
- [🥇 Gold Sponsors - $5000/mo](#-gold-sponsors---5000mo)
|
||||||
|
- [🥈 Silver Sponsors - $1000/mo](#-silver-sponsors---1000mo)
|
||||||
|
- [🥉 Bronze Sponsors - $500/mo](#-bronze-sponsors---500mo)
|
||||||
|
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
@@ -105,148 +105,6 @@ Features:
|
|||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
## Quickstart ⚡
|
|
||||||
|
|
||||||
Get started with Axolotl in just a few steps! This quickstart guide will walk you through setting up and running a basic fine-tuning task.
|
|
||||||
|
|
||||||
**Requirements**: *Nvidia* GPU (Ampere architecture or newer for `bf16` and Flash Attention) or *AMD* GPU, Python >=3.10 and PyTorch >=2.3.1.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
|
|
||||||
|
|
||||||
# download examples and optionally deepspeed configs to the local path
|
|
||||||
axolotl fetch examples
|
|
||||||
axolotl fetch deepspeed_configs # OPTIONAL
|
|
||||||
|
|
||||||
# finetune using lora
|
|
||||||
axolotl train examples/llama-3/lora-1b.yml
|
|
||||||
```
|
|
||||||
|
|
||||||
### Edge Builds 🏎️
|
|
||||||
|
|
||||||
If you're looking for the latest features and updates between releases, you'll need to install
|
|
||||||
from source.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
|
||||||
cd axolotl
|
|
||||||
pip3 install packaging ninja
|
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
|
||||||
```
|
|
||||||
|
|
||||||
### Axolotl CLI Usage
|
|
||||||
We now support a new, more streamlined CLI using [click](https://click.palletsprojects.com/en/stable/).
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# preprocess datasets - optional but recommended
|
|
||||||
CUDA_VISIBLE_DEVICES="0" axolotl preprocess examples/llama-3/lora-1b.yml
|
|
||||||
|
|
||||||
# finetune lora
|
|
||||||
axolotl train examples/llama-3/lora-1b.yml
|
|
||||||
|
|
||||||
# inference
|
|
||||||
axolotl inference examples/llama-3/lora-1b.yml \
|
|
||||||
--lora-model-dir="./outputs/lora-out"
|
|
||||||
|
|
||||||
# gradio
|
|
||||||
axolotl inference examples/llama-3/lora-1b.yml \
|
|
||||||
--lora-model-dir="./outputs/lora-out" --gradio
|
|
||||||
|
|
||||||
# remote yaml files - the yaml config can be hosted on a public URL
|
|
||||||
# Note: the yaml config must directly link to the **raw** yaml
|
|
||||||
axolotl train https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-3/lora-1b.yml
|
|
||||||
```
|
|
||||||
|
|
||||||
We've also added a new command for fetching `examples` and `deepspeed_configs` to your
|
|
||||||
local machine. This will come in handy when installing `axolotl` from PyPI.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Fetch example YAML files (stores in "examples/" folder)
|
|
||||||
axolotl fetch examples
|
|
||||||
|
|
||||||
# Fetch deepspeed config files (stores in "deepspeed_configs/" folder)
|
|
||||||
axolotl fetch deepspeed_configs
|
|
||||||
|
|
||||||
# Optionally, specify a destination folder
|
|
||||||
axolotl fetch examples --dest path/to/folder
|
|
||||||
```
|
|
||||||
|
|
||||||
### Legacy Usage
|
|
||||||
<details>
|
|
||||||
|
|
||||||
<summary>Click to Expand</summary>
|
|
||||||
|
|
||||||
While the Axolotl CLI is the preferred method for interacting with axolotl, we
|
|
||||||
still support the legacy `-m axolotl.cli.*` usage.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# preprocess datasets - optional but recommended
|
|
||||||
CUDA_VISIBLE_DEVICES="0" python -m axolotl.cli.preprocess examples/llama-3/lora-1b.yml
|
|
||||||
|
|
||||||
# finetune lora
|
|
||||||
accelerate launch -m axolotl.cli.train examples/llama-3/lora-1b.yml
|
|
||||||
|
|
||||||
# inference
|
|
||||||
accelerate launch -m axolotl.cli.inference examples/llama-3/lora-1b.yml \
|
|
||||||
--lora_model_dir="./outputs/lora-out"
|
|
||||||
|
|
||||||
# gradio
|
|
||||||
accelerate launch -m axolotl.cli.inference examples/llama-3/lora-1b.yml \
|
|
||||||
--lora_model_dir="./outputs/lora-out" --gradio
|
|
||||||
|
|
||||||
# remote yaml files - the yaml config can be hosted on a public URL
|
|
||||||
# Note: the yaml config must directly link to the **raw** yaml
|
|
||||||
accelerate launch -m axolotl.cli.train https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-3/lora-1b.yml
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
## Badge ❤🏷️
|
|
||||||
|
|
||||||
Building something cool with Axolotl? Consider adding a badge to your model card.
|
|
||||||
|
|
||||||
```markdown
|
|
||||||
[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
|
|
||||||
```
|
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
|
|
||||||
|
|
||||||
## Sponsors 🤝❤
|
|
||||||
|
|
||||||
If you love axolotl, consider sponsoring the project by reaching out directly to [wing@axolotl.ai](mailto:wing@axolotl.ai).
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
- [Modal](https://modal.com/) Modal lets you run data/AI jobs in the cloud, by just writing a few lines of Python. Customers use Modal to deploy Gen AI models at large scale, fine-tune LLM models, run protein folding simulations, and much more.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Contributing 🤝
|
|
||||||
|
|
||||||
Please read the [contributing guide](./.github/CONTRIBUTING.md)
|
|
||||||
|
|
||||||
Bugs? Please check the [open issues](https://github.com/axolotl-ai-cloud/axolotl/issues/bug) else create a new Issue.
|
|
||||||
|
|
||||||
PRs are **greatly welcome**!
|
|
||||||
|
|
||||||
Please run the quickstart instructions followed by the below to setup env:
|
|
||||||
```bash
|
|
||||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
|
||||||
pre-commit install
|
|
||||||
|
|
||||||
# test
|
|
||||||
pytest tests/
|
|
||||||
|
|
||||||
# optional: run against all files
|
|
||||||
pre-commit run --all-files
|
|
||||||
```
|
|
||||||
|
|
||||||
Thanks to all of our contributors to date. Help drive open source AI progress forward by contributing to Axolotl.
|
|
||||||
|
|
||||||
<a href="https://github.com/axolotl-ai-cloud/axolotl/graphs/contributors">
|
|
||||||
<img src="https://contrib.rocks/image?repo=openaccess-ai-collective/axolotl" alt="contributor chart by https://contrib.rocks"/>
|
|
||||||
</a>
|
|
||||||
|
|
||||||
## Axolotl supports
|
## Axolotl supports
|
||||||
|
|
||||||
| | fp16/fp32 | lora | qlora | gptq | gptq w/flash attn | flash attn | xformers attn |
|
| | fp16/fp32 | lora | qlora | gptq | gptq w/flash attn | flash attn | xformers attn |
|
||||||
@@ -272,6 +130,41 @@ Thanks to all of our contributors to date. Help drive open source AI progress fo
|
|||||||
❌: not supported
|
❌: not supported
|
||||||
❓: untested
|
❓: untested
|
||||||
|
|
||||||
|
## Quickstart ⚡
|
||||||
|
|
||||||
|
Get started with Axolotl in just a few steps! This quickstart guide will walk you through setting up and running a basic fine-tuning task.
|
||||||
|
|
||||||
|
**Requirements**: Nvidia GPU (Ampere architecture or newer for `bf16` and Flash Attention), Python >=3.10 and PyTorch >=2.3.1.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/axolotl-ai-cloud/axolotl
|
||||||
|
cd axolotl
|
||||||
|
|
||||||
|
pip3 install packaging ninja
|
||||||
|
pip3 install -e '.[flash-attn,deepspeed]'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
```bash
|
||||||
|
# preprocess datasets - optional but recommended
|
||||||
|
CUDA_VISIBLE_DEVICES="0" python -m axolotl.cli.preprocess examples/openllama-3b/lora.yml
|
||||||
|
|
||||||
|
# finetune lora
|
||||||
|
accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
|
||||||
|
|
||||||
|
# inference
|
||||||
|
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
||||||
|
--lora_model_dir="./outputs/lora-out"
|
||||||
|
|
||||||
|
# gradio
|
||||||
|
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
||||||
|
--lora_model_dir="./outputs/lora-out" --gradio
|
||||||
|
|
||||||
|
# remote yaml files - the yaml config can be hosted on a public URL
|
||||||
|
# Note: the yaml config must directly link to the **raw** yaml
|
||||||
|
accelerate launch -m axolotl.cli.train https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/openllama-3b/lora.yml
|
||||||
|
```
|
||||||
|
|
||||||
## Advanced Setup
|
## Advanced Setup
|
||||||
|
|
||||||
### Environment
|
### Environment
|
||||||
@@ -320,7 +213,7 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
|||||||
3. Install Axolotl along with python dependencies
|
3. Install Axolotl along with python dependencies
|
||||||
```bash
|
```bash
|
||||||
pip3 install packaging
|
pip3 install packaging
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
pip3 install -e '.[flash-attn,deepspeed]'
|
||||||
```
|
```
|
||||||
4. (Optional) Login to Huggingface to use gated models/datasets.
|
4. (Optional) Login to Huggingface to use gated models/datasets.
|
||||||
```bash
|
```bash
|
||||||
@@ -399,7 +292,7 @@ Please use WSL or Docker!
|
|||||||
|
|
||||||
Use the below instead of the install method in QuickStart.
|
Use the below instead of the install method in QuickStart.
|
||||||
```
|
```
|
||||||
pip3 install --no-build-isolation -e '.'
|
pip3 install -e '.'
|
||||||
```
|
```
|
||||||
More info: [mac.md](/docs/mac.qmd)
|
More info: [mac.md](/docs/mac.qmd)
|
||||||
|
|
||||||
@@ -789,6 +682,86 @@ See [this debugging guide](docs/debugging.qmd) for tips on debugging Axolotl, al
|
|||||||
|
|
||||||
## Need help? 🙋
|
## Need help? 🙋
|
||||||
|
|
||||||
Join our [Discord server](https://discord.gg/HhrNrHJPRb) where our community members can help you.
|
Join our [Discord server](https://discord.gg/HhrNrHJPRb) where we our community members can help you.
|
||||||
|
|
||||||
Need dedicated support? Please contact us at [✉️wing@axolotl.ai](ailto:wing@axolotl.ai) for dedicated support options.
|
Need dedicated support? Please contact us at [✉️wing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org) for dedicated support options.
|
||||||
|
|
||||||
|
## Badge ❤🏷️
|
||||||
|
|
||||||
|
Building something cool with Axolotl? Consider adding a badge to your model card.
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
|
||||||
|
```
|
||||||
|
|
||||||
|
[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
|
||||||
|
|
||||||
|
## Community Showcase
|
||||||
|
|
||||||
|
Check out some of the projects and models that have been built using Axolotl! Have a model you'd like to add to our Community Showcase? Open a PR with your model.
|
||||||
|
|
||||||
|
Open Access AI Collective
|
||||||
|
- [Minotaur 13b](https://huggingface.co/openaccess-ai-collective/minotaur-13b-fixed)
|
||||||
|
- [Manticore 13b](https://huggingface.co/openaccess-ai-collective/manticore-13b)
|
||||||
|
- [Hippogriff 30b](https://huggingface.co/openaccess-ai-collective/hippogriff-30b-chat)
|
||||||
|
|
||||||
|
PocketDoc Labs
|
||||||
|
- [Dan's PersonalityEngine 13b LoRA](https://huggingface.co/PocketDoc/Dans-PersonalityEngine-13b-LoRA)
|
||||||
|
|
||||||
|
## Contributing 🤝
|
||||||
|
|
||||||
|
Please read the [contributing guide](./.github/CONTRIBUTING.md)
|
||||||
|
|
||||||
|
Bugs? Please check the [open issues](https://github.com/axolotl-ai-cloud/axolotl/issues/bug) else create a new Issue.
|
||||||
|
|
||||||
|
PRs are **greatly welcome**!
|
||||||
|
|
||||||
|
Please run the quickstart instructions followed by the below to setup env:
|
||||||
|
```bash
|
||||||
|
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||||
|
pre-commit install
|
||||||
|
|
||||||
|
# test
|
||||||
|
pytest tests/
|
||||||
|
|
||||||
|
# optional: run against all files
|
||||||
|
pre-commit run --all-files
|
||||||
|
```
|
||||||
|
|
||||||
|
Thanks to all of our contributors to date. Help drive open source AI progress forward by contributing to Axolotl.
|
||||||
|
|
||||||
|
<a href="https://github.com/axolotl-ai-cloud/axolotl/graphs/contributors">
|
||||||
|
<img src="https://contrib.rocks/image?repo=openaccess-ai-collective/axolotl" alt="contributor chart by https://contrib.rocks"/>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
## Sponsors 🤝❤
|
||||||
|
|
||||||
|
OpenAccess AI Collective is run by volunteer contributors such as [winglian](https://github.com/winglian),
|
||||||
|
[NanoCode012](https://github.com/NanoCode012), [tmm1](https://github.com/tmm1),
|
||||||
|
[mhenrichsen](https://github.com/mhenrichsen), [casper-hansen](https://github.com/casper-hansen),
|
||||||
|
[hamelsmu](https://github.com/hamelsmu) and many more who help us accelerate forward by fixing bugs, answering
|
||||||
|
community questions and implementing new features. Axolotl needs donations from sponsors for the compute needed to
|
||||||
|
run our unit & integration tests, troubleshooting community issues, and providing bounties. If you love axolotl,
|
||||||
|
consider sponsoring the project via [GitHub Sponsors](https://github.com/sponsors/OpenAccess-AI-Collective),
|
||||||
|
[Ko-fi](https://ko-fi.com/axolotl_ai) or reach out directly to
|
||||||
|
[wing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 💎 Diamond Sponsors - [Contact directly](mailto:wing@openaccessaicollective.org)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 🥇 Gold Sponsors - $5000/mo
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 🥈 Silver Sponsors - $1000/mo
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 🥉 Bronze Sponsors - $500/mo
|
||||||
|
|
||||||
|
- [JarvisLabs.ai](https://jarvislabs.ai)
|
||||||
|
|
||||||
|
---
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
|||||||
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
||||||
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
|
||||||
ENV CUDA="{{ CUDA }}"
|
ENV CUDA="{{ CUDA }}"
|
||||||
|
ENV BNB_CUDA_VERSION="{{ CUDA }}"
|
||||||
ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
|
ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
|
||||||
ENV GITHUB_REF="{{ GITHUB_REF }}"
|
ENV GITHUB_REF="{{ GITHUB_REF }}"
|
||||||
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
||||||
@@ -31,9 +32,9 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
pip install -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||||
else \
|
else \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
|
pip install -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN python scripts/unsloth_install.py | sh
|
RUN python scripts/unsloth_install.py | sh
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
pytest -v --durations=10 -n8 --ignore=tests/e2e/ /workspace/axolotl/tests/
|
||||||
|
pytest -v --durations=10 -n1 --dist loadfile -v /workspace/axolotl/tests/e2e/patched/ /workspace/axolotl/tests/e2e/integrations/
|
||||||
pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
|
|
||||||
# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
|
|
||||||
pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
|
|
||||||
pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
|
|
||||||
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
|
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
|||||||
ARG AXOLOTL_EXTRAS=""
|
ARG AXOLOTL_EXTRAS=""
|
||||||
ARG AXOLOTL_ARGS=""
|
ARG AXOLOTL_ARGS=""
|
||||||
ARG CUDA="118"
|
ARG CUDA="118"
|
||||||
|
ENV BNB_CUDA_VERSION=$CUDA
|
||||||
ARG PYTORCH_VERSION="2.1.2"
|
ARG PYTORCH_VERSION="2.1.2"
|
||||||
|
|
||||||
ENV PYTORCH_VERSION=$PYTORCH_VERSION
|
ENV PYTORCH_VERSION=$PYTORCH_VERSION
|
||||||
@@ -20,9 +21,9 @@ WORKDIR /workspace/axolotl
|
|||||||
|
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
pip install -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||||
else \
|
else \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
|
pip install -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
RUN python scripts/unsloth_install.py | sh
|
RUN python scripts/unsloth_install.py | sh
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ ENV PYTHON_VERSION=$PYTHON_VERSION
|
|||||||
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
|
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
|
||||||
|
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config && rm -rf /var/lib/apt/lists/* \
|
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev && rm -rf /var/lib/apt/lists/* \
|
||||||
&& wget \
|
&& wget \
|
||||||
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
|
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
|
||||||
&& mkdir /root/.conda \
|
&& mkdir /root/.conda \
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ ARG BASE_TAG=main
|
|||||||
FROM axolotlai/axolotl:$BASE_TAG
|
FROM axolotlai/axolotl:$BASE_TAG
|
||||||
|
|
||||||
ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
|
ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
|
||||||
ENV HF_HUB_CACHE="/workspace/data/huggingface-cache/hub"
|
ENV HUGGINGFACE_HUB_CACHE="/workspace/data/huggingface-cache/hub"
|
||||||
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
||||||
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ ARG BASE_TAG=main
|
|||||||
FROM axolotlai/axolotl:$BASE_TAG
|
FROM axolotlai/axolotl:$BASE_TAG
|
||||||
|
|
||||||
ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
|
ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
|
||||||
ENV HF_HUB_CACHE="/workspace/data/huggingface-cache/hub"
|
ENV HUGGINGFACE_HUB_CACHE="/workspace/data/huggingface-cache/hub"
|
||||||
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
||||||
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
|||||||
ARG AXOLOTL_EXTRAS=""
|
ARG AXOLOTL_EXTRAS=""
|
||||||
ARG AXOLOTL_ARGS=""
|
ARG AXOLOTL_ARGS=""
|
||||||
ARG CUDA="118"
|
ARG CUDA="118"
|
||||||
|
ENV BNB_CUDA_VERSION=$CUDA
|
||||||
ARG PYTORCH_VERSION="2.1.2"
|
ARG PYTORCH_VERSION="2.1.2"
|
||||||
ARG GITHUB_REF="main"
|
ARG GITHUB_REF="main"
|
||||||
|
|
||||||
@@ -24,9 +25,9 @@ RUN git fetch origin +$GITHUB_REF && \
|
|||||||
|
|
||||||
# If AXOLOTL_EXTRAS is set, append it in brackets
|
# If AXOLOTL_EXTRAS is set, append it in brackets
|
||||||
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
|
||||||
else \
|
else \
|
||||||
pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
|
pip install -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# So we can test the Docker image
|
# So we can test the Docker image
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ export GPU_ARCHS="gfx90a"
|
|||||||
cd flash-attention
|
cd flash-attention
|
||||||
export PYTHON_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
|
export PYTHON_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
|
||||||
patch "${PYTHON_SITE_PACKAGES}/torch/utils/hipify/hipify_python.py" hipify_patch.patch
|
patch "${PYTHON_SITE_PACKAGES}/torch/utils/hipify/hipify_python.py" hipify_patch.patch
|
||||||
pip install --no-build-isolation .
|
pip install .
|
||||||
```
|
```
|
||||||
|
|
||||||
### 6. Install Axolotl
|
### 6. Install Axolotl
|
||||||
@@ -63,7 +63,7 @@ Clone and install Axolotl:
|
|||||||
git clone https://github.com/axolotl-ai-cloud/axolotl
|
git clone https://github.com/axolotl-ai-cloud/axolotl
|
||||||
cd axolotl
|
cd axolotl
|
||||||
pip install packaging ninja
|
pip install packaging ninja
|
||||||
pip install --no-build-isolation -e .
|
pip install -e .
|
||||||
```
|
```
|
||||||
|
|
||||||
### 7. Apply xformers Workaround
|
### 7. Apply xformers Workaround
|
||||||
|
|||||||
@@ -127,40 +127,34 @@ datasets:
|
|||||||
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
|
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
|
||||||
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
|
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
|
||||||
chat_template: tokenizer_default
|
chat_template: tokenizer_default
|
||||||
|
# Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
|
||||||
# Custom jinja chat template. Used only if `chat_template: jinja` or empty.
|
|
||||||
chat_template_jinja:
|
chat_template_jinja:
|
||||||
|
# The key in the data example that contains the messages. Default is "messages".
|
||||||
# Key containing the messages (default: "messages")
|
|
||||||
field_messages: messages
|
field_messages: messages
|
||||||
# Key for role in each message (default: "role")
|
# The key in the message turn that contains the role. Default is "role".
|
||||||
message_field_role: role
|
message_field_role: role
|
||||||
# Key for content in each message (default: "content")
|
# The key in the message turn that contains the content. Default is "content".
|
||||||
message_field_content: content
|
message_field_content: content
|
||||||
|
# Optional[Dict[str, List]]. Roles mapping for the messages.
|
||||||
# Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
|
|
||||||
roles:
|
roles:
|
||||||
user: ["human", "user"]
|
user: ["human", "user"]
|
||||||
assistant: ["gpt", "assistant"]
|
assistant: ["gpt", "assistant", "ai"]
|
||||||
system: ["system"]
|
system: ["system"]
|
||||||
tool: ["tool"]
|
|
||||||
|
|
||||||
# IMPORTANT: The following fields determine which parts of the conversation to train on.
|
## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.
|
||||||
# Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
|
|
||||||
# See examples at `docs/dataset-formats/conversation.qmd`
|
|
||||||
# Note: If the below 4 fields are empty, defaults to training only on the last message.
|
|
||||||
|
|
||||||
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
|
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
|
||||||
roles_to_train: ["assistant"] # default
|
roles_to_train: ["gpt", "assistant"]
|
||||||
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
|
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
|
||||||
# - all: train on all EOS tokens
|
# - all: train on all EOS tokens
|
||||||
# - turn (default): train on the EOS token at the end of each trainable turn
|
# - turn: train on the EOS token at the end of each trainable turn
|
||||||
# - last: train on the last EOS token in the conversation
|
# - last: train on the last EOS token in the conversation
|
||||||
train_on_eos: last
|
train_on_eos: last
|
||||||
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
|
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
|
||||||
message_field_training: training
|
message_field_training: training
|
||||||
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
|
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
|
||||||
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
|
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
|
||||||
|
# See example at `docs/dataset-formats/conversation.qmd`
|
||||||
message_field_training_detail: train_detail
|
message_field_training_detail: train_detail
|
||||||
|
|
||||||
|
|
||||||
@@ -245,9 +239,6 @@ sample_packing_group_size: 100000
|
|||||||
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
|
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
|
||||||
sample_packing_bin_size: 200
|
sample_packing_bin_size: 200
|
||||||
|
|
||||||
# Use batch flattening for speedups when not using sample_packing
|
|
||||||
batch_flattening:
|
|
||||||
|
|
||||||
# Passed through to transformers when loading the model when launched without accelerate
|
# Passed through to transformers when loading the model when launched without accelerate
|
||||||
# Use `sequential` when training w/ model parallelism to limit memory
|
# Use `sequential` when training w/ model parallelism to limit memory
|
||||||
device_map:
|
device_map:
|
||||||
@@ -340,8 +331,7 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
|
|||||||
output_dir: ./completed-model
|
output_dir: ./completed-model
|
||||||
|
|
||||||
# Whether to use torch.compile and which backend to use
|
# Whether to use torch.compile and which backend to use
|
||||||
# setting to `auto` will enable torch compile when torch>=2.5.1
|
torch_compile: # bool
|
||||||
torch_compile: # Optional[Union[Literal["auto"], bool]]
|
|
||||||
torch_compile_backend: # Optional[str]
|
torch_compile_backend: # Optional[str]
|
||||||
|
|
||||||
# Training hyperparameters
|
# Training hyperparameters
|
||||||
@@ -373,10 +363,6 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
|
|||||||
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
|
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
|
||||||
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
|
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
|
||||||
|
|
||||||
profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
|
|
||||||
# see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
|
|
||||||
# snapshots can be visualized @ https://pytorch.org/memory_viz
|
|
||||||
|
|
||||||
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
|
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
|
||||||
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
|
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
|
||||||
|
|
||||||
|
|||||||
@@ -68,8 +68,6 @@ We recommend checking the below examples for other usecases.
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
roles_to_train:
|
|
||||||
train_on_eos:
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
|
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
|
||||||
@@ -79,7 +77,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
roles_to_train: ["assistant"] # default value
|
roles_to_train: ["assistant"]
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
|
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
|
||||||
@@ -89,6 +87,7 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
|
roles_to_train: ["assistant"]
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
|
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
|
||||||
@@ -100,6 +99,7 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
|
roles_to_train: ["assistant"]
|
||||||
```
|
```
|
||||||
|
|
||||||
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
|
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/us
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip3 install packaging
|
pip3 install packaging
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
pip3 install -e '.[flash-attn,deepspeed]'
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Remote Hosts
|
#### Remote Hosts
|
||||||
@@ -212,7 +212,7 @@ You will now be in the container. Next, perform an editable install of Axolotl:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip3 install packaging
|
pip3 install packaging
|
||||||
pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
|
pip3 install -e '.[flash-attn,deepspeed]'
|
||||||
```
|
```
|
||||||
|
|
||||||
### Attach To Container
|
### Attach To Container
|
||||||
|
|||||||
@@ -52,26 +52,6 @@ datasets:
|
|||||||
type: chat_template.argilla
|
type: chat_template.argilla
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
#### KTO
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
rl: kto
|
|
||||||
rl_beta: 0.5
|
|
||||||
kto_desirable_weight: 0.2
|
|
||||||
|
|
||||||
remove_unused_columns: false
|
|
||||||
|
|
||||||
datasets:
|
|
||||||
- path: argilla/ultrafeedback-binarized-preferences-cleaned-kto
|
|
||||||
type: llama3.ultra
|
|
||||||
split: train
|
|
||||||
|
|
||||||
gradient_checkpointing: true
|
|
||||||
gradient_checkpointing_kwargs:
|
|
||||||
use_reentrant: true
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Using local dataset files
|
#### Using local dataset files
|
||||||
```yaml
|
```yaml
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: cerebras/btlm-3b-8k-base
|
base_model: cerebras/btlm-3b-8k-base
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: GPT2Tokenizer
|
tokenizer_type: GPT2Tokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
tokenizer_use_fast: true
|
tokenizer_use_fast: true
|
||||||
tokenizer_legacy: true
|
tokenizer_legacy: true
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: cerebras/Cerebras-GPT-1.3B
|
base_model: cerebras/Cerebras-GPT-1.3B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-13b-hf
|
base_model: codellama/CodeLlama-13b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-13b-hf
|
base_model: codellama/CodeLlama-13b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-34b-hf
|
base_model: codellama/CodeLlama-34b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-34b-hf
|
base_model: codellama/CodeLlama-34b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-7b-hf
|
base_model: codellama/CodeLlama-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-7b-hf
|
base_model: codellama/CodeLlama-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -24,7 +24,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"!pip install --no-build-isolation axolotl[deepspeed]"
|
"!pip install axolotl[deepspeed]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: LnL-AI/dbrx-base-converted-v2
|
base_model: LnL-AI/dbrx-base-converted-v2
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: LnL-AI/dbrx-base-converted-v2
|
base_model: LnL-AI/dbrx-base-converted-v2
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: LnL-AI/dbrx-base-converted-v2
|
base_model: LnL-AI/dbrx-base-converted-v2
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: deepseek-ai/DeepSeek-V2-Lite
|
base_model: deepseek-ai/DeepSeek-V2-Lite
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
|
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
base_model: tiiuae/falcon-7b
|
base_model: tiiuae/falcon-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,15 +1,10 @@
|
|||||||
# 1b: tiiuae/falcon-rw-1b
|
# 1b: tiiuae/falcon-rw-1b
|
||||||
# 40b: tiiuae/falcon-40b
|
# 40b: tiiuae/falcon-40b
|
||||||
base_model: tiiuae/falcon-7b
|
base_model: tiiuae/falcon-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
|
||||||
tokenizer_type: AutoTokenizer
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
model_type: AutoModelForCausalLM
|
||||||
|
tokenizer_type: AutoTokenizer
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
# enable 4bit for QLoRA
|
# enable 4bit for QLoRA
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
base_model: tiiuae/falcon-7b
|
base_model: tiiuae/falcon-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
# use google/gemma-7b if you have access
|
# use google/gemma-7b if you have access
|
||||||
base_model: mhenrichsen/gemma-7b
|
base_model: mhenrichsen/gemma-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: google/gemma-2-9b
|
base_model: google/gemma-2-9b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: google/gemma-2-2b
|
base_model: google/gemma-2-2b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForSequenceClassification
|
model_type: AutoModelForSequenceClassification
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: EleutherAI/gpt-j-6b
|
base_model: EleutherAI/gpt-j-6b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: ai21labs/Jamba-v0.1
|
base_model: ai21labs/Jamba-v0.1
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: ai21labs/Jamba-v0.1
|
base_model: ai21labs/Jamba-v0.1
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
base_model: ai21labs/AI21-Jamba-1.5-Large
|
base_model: ai21labs/AI21-Jamba-1.5-Large
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: huggyllama/llama-7b
|
base_model: huggyllama/llama-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
- path: openaccess-ai-collective/jeopardy
|
- path: openaccess-ai-collective/jeopardy
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,13 +1,8 @@
|
|||||||
base_model: TheBloke/Llama-2-7B-GPTQ
|
base_model: TheBloke/Llama-2-7B-GPTQ
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
|
||||||
tokenizer_type: LlamaTokenizer
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
gptq: true
|
gptq: true
|
||||||
gptq_disable_exllama: true
|
gptq_disable_exllama: true
|
||||||
|
model_type: AutoModelForCausalLM
|
||||||
|
tokenizer_type: LlamaTokenizer
|
||||||
tokenizer_use_fast: true
|
tokenizer_use_fast: true
|
||||||
tokenizer_legacy: true
|
tokenizer_legacy: true
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
base_model: alpindale/Llama-3.2-11B-Vision-Instruct
|
base_model: alpindale/Llama-3.2-11B-Vision-Instruct
|
||||||
# optionally might have model_type or tokenizer_type or processor_type
|
|
||||||
processor_type: AutoProcessor
|
processor_type: AutoProcessor
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
strict: false
|
strict: false
|
||||||
|
|
||||||
# these 3 lines are needed for now to handle vision chat templates w images
|
# these 3 lines are needed for now to handle vision chat templates w images
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3.1-8B
|
base_model: NousResearch/Meta-Llama-3.1-8B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
plugins:
|
plugins:
|
||||||
- axolotl.integrations.liger.LigerPlugin
|
- axolotl.integrations.liger.LigerPlugin
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3.1-8B
|
base_model: NousResearch/Meta-Llama-3.1-8B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3-8B-Instruct
|
base_model: NousResearch/Meta-Llama-3-8B-Instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: meta-llama/Llama-3.2-1B
|
base_model: meta-llama/Llama-3.2-1B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: meta-llama/Llama-3.2-1B
|
base_model: meta-llama/Llama-3.2-1B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,76 +0,0 @@
|
|||||||
base_model: NousResearch/Llama-3.2-1B
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
|
||||||
load_in_4bit: false
|
|
||||||
strict: false
|
|
||||||
|
|
||||||
datasets:
|
|
||||||
- path: teknium/GPT4-LLM-Cleaned
|
|
||||||
type: alpaca
|
|
||||||
dataset_prepared_path: last_run_prepared
|
|
||||||
val_set_size: 0.1
|
|
||||||
output_dir: ./outputs/lora-out
|
|
||||||
|
|
||||||
adapter: lora
|
|
||||||
lora_model_dir:
|
|
||||||
|
|
||||||
sequence_len: 2048
|
|
||||||
sample_packing: true
|
|
||||||
eval_sample_packing: true
|
|
||||||
pad_to_sequence_len: true
|
|
||||||
|
|
||||||
lora_r: 16
|
|
||||||
lora_alpha: 32
|
|
||||||
lora_dropout: 0.05
|
|
||||||
lora_fan_in_fan_out:
|
|
||||||
lora_target_modules:
|
|
||||||
- gate_proj
|
|
||||||
- down_proj
|
|
||||||
- up_proj
|
|
||||||
- q_proj
|
|
||||||
- v_proj
|
|
||||||
- k_proj
|
|
||||||
- o_proj
|
|
||||||
|
|
||||||
wandb_project:
|
|
||||||
wandb_entity:
|
|
||||||
wandb_watch:
|
|
||||||
wandb_name:
|
|
||||||
wandb_log_model:
|
|
||||||
|
|
||||||
gradient_accumulation_steps: 2
|
|
||||||
micro_batch_size: 2
|
|
||||||
num_epochs: 1
|
|
||||||
optimizer: adamw_8bit
|
|
||||||
lr_scheduler: cosine
|
|
||||||
learning_rate: 0.0002
|
|
||||||
|
|
||||||
train_on_inputs: false
|
|
||||||
group_by_length: false
|
|
||||||
bf16: auto
|
|
||||||
fp16:
|
|
||||||
tf32: false
|
|
||||||
|
|
||||||
gradient_checkpointing: true
|
|
||||||
early_stopping_patience:
|
|
||||||
resume_from_checkpoint:
|
|
||||||
local_rank:
|
|
||||||
logging_steps: 1
|
|
||||||
xformers_attention:
|
|
||||||
flash_attention: true
|
|
||||||
|
|
||||||
loss_watchdog_threshold: 5.0
|
|
||||||
loss_watchdog_patience: 3
|
|
||||||
|
|
||||||
warmup_steps: 10
|
|
||||||
evals_per_epoch: 4
|
|
||||||
saves_per_epoch: 1
|
|
||||||
debug:
|
|
||||||
deepspeed:
|
|
||||||
weight_decay: 0.0
|
|
||||||
fsdp:
|
|
||||||
fsdp_config:
|
|
||||||
special_tokens:
|
|
||||||
pad_token: "<|end_of_text|>"
|
|
||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3-8B
|
base_model: NousResearch/Meta-Llama-3-8B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,77 +0,0 @@
|
|||||||
base_model: meta-llama/Llama-3.2-1B
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
|
||||||
load_in_4bit: true
|
|
||||||
strict: false
|
|
||||||
|
|
||||||
rl: kto
|
|
||||||
rl_beta: 0.5
|
|
||||||
kto_desirable_weight: 0.2
|
|
||||||
|
|
||||||
datasets:
|
|
||||||
- path: argilla/ultrafeedback-binarized-preferences-cleaned-kto
|
|
||||||
type: llama3.ultra
|
|
||||||
split: train
|
|
||||||
dataset_prepared_path: last_run_prepared
|
|
||||||
val_set_size: 0.0
|
|
||||||
output_dir: ./outputs/qlora-out
|
|
||||||
|
|
||||||
remove_unused_columns: false
|
|
||||||
|
|
||||||
adapter: qlora
|
|
||||||
lora_model_dir:
|
|
||||||
|
|
||||||
sequence_len: 2048
|
|
||||||
sample_packing: false # not supported with kto
|
|
||||||
eval_sample_packing: false
|
|
||||||
pad_to_sequence_len: false
|
|
||||||
|
|
||||||
lora_r: 32
|
|
||||||
lora_alpha: 64
|
|
||||||
lora_dropout: 0.05
|
|
||||||
lora_target_linear: true
|
|
||||||
lora_fan_in_fan_out:
|
|
||||||
|
|
||||||
wandb_project:
|
|
||||||
wandb_entity:
|
|
||||||
wandb_watch:
|
|
||||||
wandb_name:
|
|
||||||
wandb_log_model:
|
|
||||||
|
|
||||||
gradient_accumulation_steps: 1
|
|
||||||
micro_batch_size: 2
|
|
||||||
num_epochs: 1
|
|
||||||
optimizer: adamw_8bit
|
|
||||||
lr_scheduler: cosine
|
|
||||||
learning_rate: 0.0002
|
|
||||||
|
|
||||||
train_on_inputs: false
|
|
||||||
group_by_length: false
|
|
||||||
bf16: auto
|
|
||||||
fp16:
|
|
||||||
tf32: true
|
|
||||||
|
|
||||||
gradient_checkpointing: true
|
|
||||||
gradient_checkpointing_kwargs:
|
|
||||||
use_reentrant: true
|
|
||||||
early_stopping_patience:
|
|
||||||
resume_from_checkpoint:
|
|
||||||
local_rank:
|
|
||||||
logging_steps: 1
|
|
||||||
xformers_attention:
|
|
||||||
flash_attention: true
|
|
||||||
|
|
||||||
warmup_steps: 20
|
|
||||||
evals_per_epoch: 4
|
|
||||||
eval_table_size:
|
|
||||||
eval_max_new_tokens: 128
|
|
||||||
saves_per_epoch: 1
|
|
||||||
debug:
|
|
||||||
deepspeed:
|
|
||||||
weight_decay: 0.0
|
|
||||||
fsdp:
|
|
||||||
fsdp_config:
|
|
||||||
special_tokens:
|
|
||||||
pad_token: "<|end_of_text|>"
|
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Llama-3.2-1B
|
base_model: meta-llama/Llama-3.2-1B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
@@ -24,6 +22,7 @@ pad_to_sequence_len: true
|
|||||||
lora_r: 32
|
lora_r: 32
|
||||||
lora_alpha: 16
|
lora_alpha: 16
|
||||||
lora_dropout: 0.05
|
lora_dropout: 0.05
|
||||||
|
lora_target_linear: true
|
||||||
lora_fan_in_fan_out:
|
lora_fan_in_fan_out:
|
||||||
lora_target_modules:
|
lora_target_modules:
|
||||||
- gate_proj
|
- gate_proj
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
|
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: casperhansen/llama-3-70b-fp16
|
base_model: casperhansen/llama-3-70b-fp16
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
|
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3-8B
|
base_model: NousResearch/Meta-Llama-3-8B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
63
examples/llava/lora-7b.yaml
Normal file
63
examples/llava/lora-7b.yaml
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
base_model: llava-hf/llava-1.5-7b-hf
|
||||||
|
processor_type: AutoProcessor
|
||||||
|
strict: false
|
||||||
|
|
||||||
|
# these 3 lines are needed for now to handle vision chat templates w images
|
||||||
|
skip_prepare_dataset: true
|
||||||
|
remove_unused_columns: false
|
||||||
|
sample_packing: false
|
||||||
|
|
||||||
|
chat_template: llava
|
||||||
|
datasets:
|
||||||
|
- path: HuggingFaceH4/llava-instruct-mix-vsft
|
||||||
|
type: chat_template
|
||||||
|
split: train[:1%]
|
||||||
|
field_messages: messages
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0.0
|
||||||
|
output_dir: ./outputs/out
|
||||||
|
|
||||||
|
adapter: lora
|
||||||
|
lora_model_dir:
|
||||||
|
|
||||||
|
sequence_len: 8192
|
||||||
|
pad_to_sequence_len: false
|
||||||
|
|
||||||
|
lora_r: 32
|
||||||
|
lora_alpha: 16
|
||||||
|
lora_dropout: 0.05
|
||||||
|
lora_target_modules: 'language_model.model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
|
||||||
|
|
||||||
|
wandb_project:
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 4
|
||||||
|
micro_batch_size: 1
|
||||||
|
num_epochs: 1
|
||||||
|
optimizer: adamw_bnb_8bit
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 0.0002
|
||||||
|
|
||||||
|
train_on_inputs: false
|
||||||
|
group_by_length: false
|
||||||
|
bf16: true
|
||||||
|
fp16:
|
||||||
|
tf32: true
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
local_rank:
|
||||||
|
logging_steps: 1
|
||||||
|
flash_attention: true
|
||||||
|
eager_attention:
|
||||||
|
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
evals_per_epoch: 1
|
||||||
|
saves_per_epoch: 1
|
||||||
|
debug:
|
||||||
|
deepspeed:
|
||||||
|
weight_decay: 0.0
|
||||||
|
fsdp:
|
||||||
|
fsdp_config:
|
||||||
@@ -1,10 +1,7 @@
|
|||||||
base_model: state-spaces/mamba-2.8b
|
base_model: state-spaces/mamba-2.8b
|
||||||
# optionally might have model_type or tokenizer_type or tokenizer_config
|
|
||||||
model_type: MambaLMHeadModel
|
model_type: MambaLMHeadModel
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
tokenizer_config: EleutherAI/gpt-neox-20b
|
tokenizer_config: EleutherAI/gpt-neox-20b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -4,11 +4,8 @@
|
|||||||
#face problems with the special tokens.
|
#face problems with the special tokens.
|
||||||
|
|
||||||
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
base_model: mosaicml/mpt-7b
|
base_model: mosaicml/mpt-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: openlm-research/open_llama_3b_v2
|
base_model: openlm-research/open_llama_3b_v2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: openlm-research/open_llama_3b_v2
|
base_model: openlm-research/open_llama_3b_v2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: openlm-research/open_llama_3b_v2
|
base_model: openlm-research/open_llama_3b_v2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/Phi-3.5-mini-instruct
|
base_model: microsoft/Phi-3.5-mini-instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/phi-1_5
|
base_model: microsoft/phi-1_5
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/phi-1_5
|
base_model: microsoft/phi-1_5
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/phi-2
|
base_model: microsoft/phi-2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/Phi-3-mini-4k-instruct
|
base_model: microsoft/Phi-3-mini-4k-instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,11 +1,7 @@
|
|||||||
base_model: microsoft/Phi-3-mini-4k-instruct
|
base_model: microsoft/Phi-3-mini-4k-instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
chat_template: phi_3
|
chat_template: phi_3
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
65
examples/pixtral/lora-12b.yml
Normal file
65
examples/pixtral/lora-12b.yml
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
base_model: mistral-community/pixtral-12b
|
||||||
|
processor_type: AutoProcessor
|
||||||
|
strict: false
|
||||||
|
|
||||||
|
# these 3 lines are needed for now to handle vision chat templates w images
|
||||||
|
skip_prepare_dataset: true
|
||||||
|
remove_unused_columns: false
|
||||||
|
sample_packing: false
|
||||||
|
|
||||||
|
chat_template: pixtral
|
||||||
|
datasets:
|
||||||
|
- path: HuggingFaceH4/llava-instruct-mix-vsft
|
||||||
|
type: chat_template
|
||||||
|
split: train[:1%]
|
||||||
|
field_messages: messages
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0.0
|
||||||
|
output_dir: ./outputs/out
|
||||||
|
|
||||||
|
adapter: lora
|
||||||
|
lora_model_dir:
|
||||||
|
|
||||||
|
sequence_len: 8192
|
||||||
|
pad_to_sequence_len: false
|
||||||
|
|
||||||
|
lora_r: 32
|
||||||
|
lora_alpha: 16
|
||||||
|
lora_dropout: 0.05
|
||||||
|
lora_target_modules: 'language_model.model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
|
||||||
|
|
||||||
|
wandb_project:
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 4
|
||||||
|
micro_batch_size: 1
|
||||||
|
num_epochs: 1
|
||||||
|
optimizer: adamw_bnb_8bit
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 0.0002
|
||||||
|
|
||||||
|
train_on_inputs: false
|
||||||
|
group_by_length: false
|
||||||
|
bf16: true
|
||||||
|
fp16:
|
||||||
|
tf32: true
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
local_rank:
|
||||||
|
logging_steps: 1
|
||||||
|
flash_attention: false # PixtralVisionModel does not support Flash Attention 2.0 yet
|
||||||
|
eager_attention:
|
||||||
|
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
evals_per_epoch: 1
|
||||||
|
saves_per_epoch: 1
|
||||||
|
debug:
|
||||||
|
deepspeed:
|
||||||
|
weight_decay: 0.0
|
||||||
|
fsdp:
|
||||||
|
fsdp_config:
|
||||||
|
special_tokens:
|
||||||
|
pad_token: <|end_of_text|>
|
||||||
@@ -1,11 +1,7 @@
|
|||||||
base_model: EleutherAI/pythia-12b-deduped
|
base_model: EleutherAI/pythia-12b-deduped
|
||||||
base_model_ignore_patterns: pytorch* # prefer safetensors
|
base_model_ignore_patterns: pytorch* # prefer safetensors
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: GPTNeoXForCausalLM
|
model_type: GPTNeoXForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
gptq: false
|
gptq: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: EleutherAI/pythia-1.4b-deduped
|
base_model: EleutherAI/pythia-1.4b-deduped
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
datasets:
|
datasets:
|
||||||
- path: teknium/GPT4-LLM-Cleaned
|
- path: teknium/GPT4-LLM-Cleaned
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: Qwen/Qwen-7B
|
base_model: Qwen/Qwen-7B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: Qwen/Qwen-7B
|
base_model: Qwen/Qwen-7B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
63
examples/qwen2-vl/lora-7b.yaml
Normal file
63
examples/qwen2-vl/lora-7b.yaml
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
base_model: Qwen/Qwen2-VL-7B-Instruct
|
||||||
|
processor_type: AutoProcessor
|
||||||
|
strict: false
|
||||||
|
|
||||||
|
# these 3 lines are needed for now to handle vision chat templates w images
|
||||||
|
skip_prepare_dataset: true
|
||||||
|
remove_unused_columns: false
|
||||||
|
sample_packing: false
|
||||||
|
|
||||||
|
chat_template: qwen2_vl
|
||||||
|
datasets:
|
||||||
|
- path: HuggingFaceH4/llava-instruct-mix-vsft
|
||||||
|
type: chat_template
|
||||||
|
split: train[:1%]
|
||||||
|
field_messages: messages
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0.0
|
||||||
|
output_dir: ./outputs/out
|
||||||
|
|
||||||
|
adapter: lora
|
||||||
|
lora_model_dir:
|
||||||
|
|
||||||
|
sequence_len: 8192
|
||||||
|
pad_to_sequence_len: false
|
||||||
|
|
||||||
|
lora_r: 32
|
||||||
|
lora_alpha: 16
|
||||||
|
lora_dropout: 0.05
|
||||||
|
lora_target_modules: 'model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
|
||||||
|
|
||||||
|
wandb_project:
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 4
|
||||||
|
micro_batch_size: 1
|
||||||
|
num_epochs: 1
|
||||||
|
optimizer: adamw_bnb_8bit
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 0.0002
|
||||||
|
|
||||||
|
train_on_inputs: false
|
||||||
|
group_by_length: false
|
||||||
|
bf16: true
|
||||||
|
fp16:
|
||||||
|
tf32: true
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
local_rank:
|
||||||
|
logging_steps: 1
|
||||||
|
flash_attention: true
|
||||||
|
eager_attention:
|
||||||
|
|
||||||
|
warmup_ratio: 0.1
|
||||||
|
evals_per_epoch: 1
|
||||||
|
saves_per_epoch: 1
|
||||||
|
debug:
|
||||||
|
deepspeed:
|
||||||
|
weight_decay: 0.0
|
||||||
|
fsdp:
|
||||||
|
fsdp_config:
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: Qwen/Qwen2.5-0.5B
|
base_model: Qwen/Qwen2.5-0.5B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
strict: false
|
strict: false
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: Qwen/Qwen2-7B
|
base_model: Qwen/Qwen2-7B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: GPTNeoXForCausalLM
|
model_type: GPTNeoXForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code:
|
trust_remote_code:
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: replit/replit-code-v1-3b
|
base_model: replit/replit-code-v1-3b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: stabilityai/stablelm-2-1_6b
|
base_model: stabilityai/stablelm-2-1_6b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user