proof of concept for sage attention

2024-11-22 14:47:19 -05:00
272 changed files with 4918 additions and 11599 deletions
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -1,16 +1,6 @@
 name: ci-cd-base

 on:
-  push:
-    branches:
-      - "main"
-    paths:
-      - 'Dockerfile-base'
-      - '.github/workflows/base.yml'
-  pull_request:
-    paths:
-      - 'Dockerfile-base'
-      - '.github/workflows/base.yml'
  workflow_dispatch:

 jobs:
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -1,7 +1,6 @@
 name: lint
 on:
  # check on PRs, and manual triggers
-  merge_group:
  pull_request:
      paths:
       - '**.py'
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -25,6 +25,7 @@ jobs:
            python_version: "3.11"
            pytorch: 2.3.1
            axolotl_extras: mamba-ssm
+            is_latest: true
          - cuda: 124
            cuda_version: 12.4.1
            python_version: "3.11"
@@ -35,7 +36,6 @@ jobs:
            python_version: "3.11"
            pytorch: 2.5.1
            axolotl_extras:
-            is_latest: true
    runs-on: axolotl-gpu-runner
    steps:
      - name: Checkout
@@ -92,6 +92,7 @@ jobs:
            python_version: "3.11"
            pytorch: 2.3.1
            axolotl_extras:
+            is_latest: true
          - cuda: 124
            cuda_version: 12.4.1
            python_version: "3.11"
@@ -102,7 +103,6 @@ jobs:
            python_version: "3.11"
            pytorch: 2.5.1
            axolotl_extras:
-            is_latest: true
    runs-on: axolotl-gpu-runner
    steps:
      - name: Checkout
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -52,7 +52,7 @@ jobs:
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
-          pip install modal==0.71.8 jinja2
+          pip install modal==0.63.64 jinja2
      - name: Update env vars
        run: |
          echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -13,13 +13,10 @@ jobs:
    permissions:
      contents: write
    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
      - name: Create release
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: gh release create "$GITHUB_REF_NAME" --generate-notes
+        run: gh release create "$GITHUB_REF_NAME" # GITHUB_REF_NAME is the tag name in `on.push.tags` workflows
  pypi-publish:
    name: Upload release to PyPI
    runs-on: ubuntu-latest
@@ -41,7 +38,7 @@ jobs:
      - name: Install dependencies
        run: |
          pip3 install wheel packaging
-          pip3 install --no-build-isolation -e .
+          pip3 install -e .
          pip3 install -r requirements-dev.txt -r requirements-tests.txt

      - name: Extract tag name
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -23,15 +23,9 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
-      max-parallel: 2
      matrix:
        python_version: ["3.10", "3.11"]
        pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
-        exclude:
-          - python_version: "3.10"
-            pytorch_version: "2.4.1"
-          - python_version: "3.10"
-            pytorch_version: "2.5.1"
    timeout-minutes: 20

    steps:
@@ -44,11 +38,6 @@ jobs:
          python-version: ${{ matrix.python_version }}
          cache: 'pip' # caching pip dependencies

-      - name: upgrade pip
-        run: |
-          pip3 install --upgrade pip
-          pip3 install --upgrade packaging setuptools wheel
-
      - name: Install PyTorch
        run: |
          pip3 install torch==${{ matrix.pytorch_version }} --index-url https://download.pytorch.org/whl/cpu
@@ -65,23 +54,12 @@ jobs:
        run: |
          pip3 install --upgrade pip
          pip3 install --upgrade packaging
-          pip3 install --no-build-isolation -U -e .
-          python scripts/unsloth_install.py | sh
-          python scripts/cutcrossentropy_install.py | sh
+          pip3 install -U -e .
          pip3 install -r requirements-dev.txt -r requirements-tests.txt

-      - name: Make sure PyTorch version wasn't clobbered
-        run: |
-          python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
-
-      - name: Ensure axolotl CLI was installed
-        run: |
-          axolotl --help
-
      - name: Run tests
        run: |
-          pytest -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/
-          pytest tests/patched/
+          pytest --ignore=tests/e2e/ tests/

      - name: cleanup pip cache
        run: |
@@ -129,7 +107,7 @@ jobs:
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
-          pip install modal==0.71.8 jinja2
+          pip install modal==0.63.64 jinja2
      - name: Update env vars
        run: |
          echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,7 +1,6 @@
 name: Tests
 on:
  # check on push/merge to main, PRs, and manual triggers
-  merge_group:
  push:
    branches:
      - "main"
@@ -9,17 +8,11 @@ on:
      - '**.py'
      - 'requirements.txt'
      - '.github/workflows/*.yml'
-      - 'requirements-tests.txt'
-      - 'cicd/cicd.sh'
-      - 'cicd/Dockerfile.jinja'
  pull_request:
      paths:
       - '**.py'
       - 'requirements.txt'
       - '.github/workflows/*.yml'
-       - 'requirements-tests.txt'
-       - 'cicd/cicd.sh'
-       - 'cicd/Dockerfile.jinja'
  workflow_dispatch:

 # Cancel jobs on the same ref if a new one is triggered
@@ -46,30 +39,15 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
-      max-parallel: 2
      matrix:
        python_version: ["3.10", "3.11"]
        pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
-        exclude:
-          - python_version: "3.10"
-            pytorch_version: "2.4.1"
-          - python_version: "3.10"
-            pytorch_version: "2.5.1"
    timeout-minutes: 20

    steps:
      - name: Check out repository code
        uses: actions/checkout@v4

-      - name: Restore HF cache
-        id: hf-cache-restore
-        uses: actions/cache/restore@v4
-        with:
-          path: |
-            /home/runner/.cache/huggingface/hub/datasets--*
-            /home/runner/.cache/huggingface/hub/models--*
-          key: ${{ runner.os }}-hf-hub-cache-${{ hashFiles('**/conftest.py') }}
-
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
@@ -88,43 +66,22 @@ jobs:
      - name: Install dependencies
        run: |
          pip3 show torch
-          pip3 install --no-build-isolation -U -e .
-          python scripts/unsloth_install.py | sh
-          python scripts/cutcrossentropy_install.py | sh
+          pip3 install -U -e .
          pip3 install -r requirements-dev.txt -r requirements-tests.txt

-      - name: Make sure PyTorch version wasn't clobbered
-        run: |
-          python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
-
-      - name: Ensure axolotl CLI was installed
-        run: |
-          axolotl --help
-
      - name: Run tests
        run: |
-          pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/
-          pytest -v tests/patched/
+          pytest -n8 --ignore=tests/e2e/ tests/

      - name: cleanup pip cache
        run: |
          find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;

-      - name: Save HF cache
-        id: hf-cache
-        uses: actions/cache/save@v4
-        with:
-          path: |
-            /home/runner/.cache/huggingface/hub/datasets--*
-            /home/runner/.cache/huggingface/hub/models--*
-          key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
-
  pytest-sdist:
    name: PyTest from Source Dist
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
-      max-parallel: 1
      matrix:
        python_version: ["3.11"]
        pytorch_version: ["2.4.1", "2.5.1"]
@@ -134,15 +91,6 @@ jobs:
      - name: Check out repository code
        uses: actions/checkout@v4

-      - name: Restore HF cache
-        id: hf-cache-restore
-        uses: actions/cache/restore@v4
-        with:
-          path: |
-            /home/runner/.cache/huggingface/hub/datasets--*
-            /home/runner/.cache/huggingface/hub/models--*
-          key: ${{ runner.os }}-hf-hub-cache-${{ hashFiles('**/conftest.py') }}
-
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
@@ -152,7 +100,7 @@ jobs:
      - name: upgrade pip
        run: |
          pip3 install --upgrade pip
-          pip3 install --upgrade packaging setuptools setuptools_scm build wheel
+          pip3 install --upgrade packaging setuptools wheel

      - name: Install PyTorch
        run: |
@@ -161,38 +109,18 @@ jobs:
      - name: Install dependencies
        run: |
          pip3 show torch
-          python -m build --no-isolation --sdist
-          pip3 install --no-build-isolation dist/axolotl*.tar.gz
-          python scripts/unsloth_install.py | sh
-          python scripts/cutcrossentropy_install.py | sh
+          python3 setup.py sdist
+          pip3 install dist/axolotl*.tar.gz
          pip3 install -r requirements-dev.txt -r requirements-tests.txt

-      - name: Make sure PyTorch version wasn't clobbered
-        run: |
-          python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
-
-      - name: Ensure axolotl CLI was installed
-        run: |
-          axolotl --help
-
      - name: Run tests
        run: |
-          pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/
-          pytest -v tests/patched/
+          pytest -n8 --ignore=tests/e2e/ tests/

      - name: cleanup pip cache
        run: |
          find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;

-      - name: Save HF cache
-        id: hf-cache
-        uses: actions/cache/save@v4
-        with:
-          path: |
-            /home/runner/.cache/huggingface/hub/datasets--*
-            /home/runner/.cache/huggingface/hub/models--*
-          key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
-
  docker-e2e-tests-1st:
    if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }}
    # this job needs to be run on self-hosted GPU runners...
@@ -207,7 +135,7 @@ jobs:
          - cuda: 124
            cuda_version: 12.4.1
            python_version: "3.11"
-            pytorch: 2.5.1
+            pytorch: 2.4.1
            num_gpus: 1
            axolotl_extras:
    steps:
@@ -220,7 +148,7 @@ jobs:
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
-          pip install modal==0.71.8 jinja2
+          pip install modal==0.63.64 jinja2
      - name: Update env vars
        run: |
          echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
@@ -253,7 +181,7 @@ jobs:
          - cuda: 124
            cuda_version: 12.4.1
            python_version: "3.11"
-            pytorch: 2.4.1
+            pytorch: 2.5.1
            num_gpus: 1
            axolotl_extras:
    steps:
@@ -266,7 +194,7 @@ jobs:
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
-          pip install modal==0.71.8 jinja2
+          pip install modal==0.63.64 jinja2
      - name: Update env vars
        run: |
          echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 **/axolotl.egg-info
 configs
 last_run_prepared/
-outputs
 .vscode
 _site/

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
    hooks:
    - id: flake8
 -   repo: https://github.com/PyCQA/pylint
-    rev: v3.3.0
+    rev: v2.17.4
    hooks:
    - id: pylint
 -   repo: https://github.com/pre-commit/mirrors-mypy
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,5 +1,5 @@
 [MASTER]
-init-hook="from pylint.config import find_default_config_files; import sys; sys.path.append(next(find_default_config_files()).parent.as_posix())"
+init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc()))"

 [TYPECHECK]

@@ -12,4 +12,3 @@ generated-members=numpy.*, torch.*
 disable=missing-function-docstring, line-too-long, import-error,
    too-many-arguments, too-many-locals, too-many-statements, too-many-branches, too-few-public-methods,
    too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation,
-    too-many-positional-arguments, possibly-used-before-assignment
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,4 @@
 include requirements.txt
 include README.md
 include LICENSE
-include src/setuptools_axolotl_dynamic_dependencies.py
 recursive-include axolotl *.py
--- a/README.md
+++ b/README.md
@@ -10,13 +10,9 @@
    <img src="https://img.shields.io/github/license/axolotl-ai-cloud/axolotl.svg?color=blue" alt="GitHub License">
    <img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/tests.yml/badge.svg" alt="tests">
    <a href="https://github.com/axolotl-ai-cloud/axolotl/releases"><img src="https://img.shields.io/github/release/axolotl-ai-cloud/axolotl.svg" alt="Releases"></a>
-    <br/>
-    <a href="https://github.com/axolotl-ai-cloud/axolotl/graphs/contributors"><img src="https://img.shields.io/github/contributors-anon/axolotl-ai-cloud/axolotl?color=yellow&style=flat-square" alt="contributors" style="height: 20px;"></a>
    <img src="https://img.shields.io/github/stars/axolotl-ai-cloud/axolotl" alt="GitHub Repo stars">
-    <br/>
-    <a href="https://discord.com/invite/HhrNrHJPRb"><img src="https://img.shields.io/badge/discord-7289da.svg?style=flat-square&logo=discord" alt="discord" style="height: 20px;"></a>
-    <a href="https://twitter.com/axolotl_ai"><img src="https://img.shields.io/twitter/follow/axolotl_ai?style=social" alt="twitter" style="height: 20px;"></a>
-    <br/>
+</p>
+<p align="center">
    <img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/tests-nightly.yml/badge.svg" alt="tests-nightly">
    <img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/multi-gpu-e2e.yml/badge.svg" alt="multigpu-semi-weekly tests">
 </p>
@@ -45,13 +41,9 @@ Features:
 ## Table of Contents
 - [Axolotl](#axolotl)
  - [Table of Contents](#table-of-contents)
-  - [Quickstart ⚡](#quickstart-)
-    - [Edge Builds](#edge-builds-)
-    - [Axolotl CLI Usage](#axolotl-cli-usage)
-  - [Badge ❤🏷️](#badge-️)
-  - [Contributing 🤝](#contributing-)
-  - [Sponsors 🤝❤](#sponsors-)
  - [Axolotl supports](#axolotl-supports)
+  - [Quickstart ⚡](#quickstart-)
+    - [Usage](#usage)
  - [Advanced Setup](#advanced-setup)
    - [Environment](#environment)
      - [Docker](#docker)
@@ -83,6 +75,14 @@ Features:
    - [Tokenization Mismatch b/w Inference \& Training](#tokenization-mismatch-bw-inference--training)
  - [Debugging Axolotl](#debugging-axolotl)
  - [Need help? 🙋](#need-help-)
+  - [Badge ❤🏷️](#badge-️)
+  - [Community Showcase](#community-showcase)
+  - [Contributing 🤝](#contributing-)
+  - [Sponsors 🤝❤](#sponsors-)
+      - [💎 Diamond Sponsors - Contact directly](#-diamond-sponsors---contact-directly)
+      - [🥇 Gold Sponsors - $5000/mo](#-gold-sponsors---5000mo)
+      - [🥈 Silver Sponsors - $1000/mo](#-silver-sponsors---1000mo)
+      - [🥉 Bronze Sponsors - $500/mo](#-bronze-sponsors---500mo)

 </td>
 <td>
@@ -105,148 +105,6 @@ Features:
 </tr>
 </table>

-## Quickstart ⚡
-
-Get started with Axolotl in just a few steps! This quickstart guide will walk you through setting up and running a basic fine-tuning task.
-
-**Requirements**: *Nvidia* GPU (Ampere architecture or newer for `bf16` and Flash Attention) or *AMD* GPU, Python >=3.10 and PyTorch >=2.3.1.
-
-```bash
-pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
-
-# download examples and optionally deepspeed configs to the local path
-axolotl fetch examples
-axolotl fetch deepspeed_configs  # OPTIONAL
-
-# finetune using lora
-axolotl train examples/llama-3/lora-1b.yml
-```
-
-### Edge Builds 🏎️
-
-If you're looking for the latest features and updates between releases, you'll need to install
-from source.
-
-```bash
-git clone https://github.com/axolotl-ai-cloud/axolotl.git
-cd axolotl
-pip3 install packaging ninja
-pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
-```
-
-### Axolotl CLI Usage
-We now support a new, more streamlined CLI using [click](https://click.palletsprojects.com/en/stable/).
-
-```bash
-# preprocess datasets - optional but recommended
-CUDA_VISIBLE_DEVICES="0" axolotl preprocess examples/llama-3/lora-1b.yml
-
-# finetune lora
-axolotl train examples/llama-3/lora-1b.yml
-
-# inference
-axolotl inference examples/llama-3/lora-1b.yml \
-    --lora-model-dir="./outputs/lora-out"
-
-# gradio
-axolotl inference examples/llama-3/lora-1b.yml \
-    --lora-model-dir="./outputs/lora-out" --gradio
-
-# remote yaml files - the yaml config can be hosted on a public URL
-# Note: the yaml config must directly link to the **raw** yaml
-axolotl train https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-3/lora-1b.yml
-```
-
-We've also added a new command for fetching `examples` and `deepspeed_configs` to your
-local machine. This will come in handy when installing `axolotl` from PyPI.
-
-```bash
-# Fetch example YAML files (stores in "examples/" folder)
-axolotl fetch examples
-
-# Fetch deepspeed config files (stores in "deepspeed_configs/" folder)
-axolotl fetch deepspeed_configs
-
-# Optionally, specify a destination folder
-axolotl fetch examples --dest path/to/folder
-```
-
-### Legacy Usage
-<details>
-
-<summary>Click to Expand</summary>
-
-While the Axolotl CLI is the preferred method for interacting with axolotl, we
-still support the legacy `-m axolotl.cli.*` usage.
-
-```bash
-# preprocess datasets - optional but recommended
-CUDA_VISIBLE_DEVICES="0" python -m axolotl.cli.preprocess examples/llama-3/lora-1b.yml
-
-# finetune lora
-accelerate launch -m axolotl.cli.train examples/llama-3/lora-1b.yml
-
-# inference
-accelerate launch -m axolotl.cli.inference examples/llama-3/lora-1b.yml \
-    --lora_model_dir="./outputs/lora-out"
-
-# gradio
-accelerate launch -m axolotl.cli.inference examples/llama-3/lora-1b.yml \
-    --lora_model_dir="./outputs/lora-out" --gradio
-
-# remote yaml files - the yaml config can be hosted on a public URL
-# Note: the yaml config must directly link to the **raw** yaml
-accelerate launch -m axolotl.cli.train https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-3/lora-1b.yml
-```
-
-</details>
-
-## Badge ❤🏷️
-
-Building something cool with Axolotl? Consider adding a badge to your model card.
-
-```markdown
-[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
-```
-
-[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
-
-## Sponsors 🤝❤
-
-If you love axolotl, consider sponsoring the project by reaching out directly to [wing@axolotl.ai](mailto:wing@axolotl.ai).
-
---
-
- [Modal](https://modal.com/) Modal lets you run data/AI jobs in the cloud, by just writing a few lines of Python. Customers use Modal to deploy Gen AI models at large scale, fine-tune LLM models, run protein folding simulations, and much more.
-
---
-
-## Contributing 🤝
-
-Please read the [contributing guide](./.github/CONTRIBUTING.md)
-
-Bugs? Please check the [open issues](https://github.com/axolotl-ai-cloud/axolotl/issues/bug) else create a new Issue.
-
-PRs are **greatly welcome**!
-
-Please run the quickstart instructions followed by the below to setup env:
-```bash
-pip3 install -r requirements-dev.txt -r requirements-tests.txt
-pre-commit install
-
-# test
-pytest tests/
-
-# optional: run against all files
-pre-commit run --all-files
-```
-
-Thanks to all of our contributors to date. Help drive open source AI progress forward by contributing to Axolotl.
-
-<a href="https://github.com/axolotl-ai-cloud/axolotl/graphs/contributors">
-  <img src="https://contrib.rocks/image?repo=openaccess-ai-collective/axolotl" alt="contributor chart by https://contrib.rocks"/>
-</a>
-
 ## Axolotl supports

 |             | fp16/fp32 | lora | qlora | gptq | gptq w/flash attn | flash attn | xformers attn |
@@ -272,6 +130,41 @@ Thanks to all of our contributors to date. Help drive open source AI progress fo
 ❌: not supported
 ❓: untested

+## Quickstart ⚡
+
+Get started with Axolotl in just a few steps! This quickstart guide will walk you through setting up and running a basic fine-tuning task.
+
+**Requirements**: Nvidia GPU (Ampere architecture or newer for `bf16` and Flash Attention), Python >=3.10 and PyTorch >=2.3.1.
+
+```bash
+git clone https://github.com/axolotl-ai-cloud/axolotl
+cd axolotl
+
+pip3 install packaging ninja
+pip3 install -e '.[flash-attn,deepspeed]'
+```
+
+### Usage
+```bash
+# preprocess datasets - optional but recommended
+CUDA_VISIBLE_DEVICES="" python -m axolotl.cli.preprocess examples/openllama-3b/lora.yml
+
+# finetune lora
+accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
+
+# inference
+accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
+    --lora_model_dir="./outputs/lora-out"
+
+# gradio
+accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
+    --lora_model_dir="./outputs/lora-out" --gradio
+
+# remote yaml files - the yaml config can be hosted on a public URL
+# Note: the yaml config must directly link to the **raw** yaml
+accelerate launch -m axolotl.cli.train https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/openllama-3b/lora.yml
+```
+
 ## Advanced Setup

 ### Environment
@@ -320,7 +213,7 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
  3. Install Axolotl along with python dependencies
        ```bash
        pip3 install packaging
-        pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
+        pip3 install -e '.[flash-attn,deepspeed]'
        ```
  4. (Optional) Login to Huggingface to use gated models/datasets.
        ```bash
@@ -399,7 +292,7 @@ Please use WSL or Docker!

 Use the below instead of the install method in QuickStart.
 ```
-pip3 install --no-build-isolation -e '.'
+pip3 install -e '.'
 ```
 More info: [mac.md](/docs/mac.qmd)

@@ -789,6 +682,86 @@ See [this debugging guide](docs/debugging.qmd) for tips on debugging Axolotl, al

 ## Need help? 🙋

-Join our [Discord server](https://discord.gg/HhrNrHJPRb) where our community members can help you.
+Join our [Discord server](https://discord.gg/HhrNrHJPRb) where we our community members can help you.

-Need dedicated support? Please contact us at [✉️wing@axolotl.ai](ailto:wing@axolotl.ai) for dedicated support options.
+Need dedicated support? Please contact us at [✉️wing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org) for dedicated support options.
+
+## Badge ❤🏷️
+
+Building something cool with Axolotl? Consider adding a badge to your model card.
+
+```markdown
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+```
+
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+
+## Community Showcase
+
+Check out some of the projects and models that have been built using Axolotl! Have a model you'd like to add to our Community Showcase? Open a PR with your model.
+
+Open Access AI Collective
+- [Minotaur 13b](https://huggingface.co/openaccess-ai-collective/minotaur-13b-fixed)
+- [Manticore 13b](https://huggingface.co/openaccess-ai-collective/manticore-13b)
+- [Hippogriff 30b](https://huggingface.co/openaccess-ai-collective/hippogriff-30b-chat)
+
+PocketDoc Labs
+- [Dan's PersonalityEngine 13b LoRA](https://huggingface.co/PocketDoc/Dans-PersonalityEngine-13b-LoRA)
+
+## Contributing 🤝
+
+Please read the [contributing guide](./.github/CONTRIBUTING.md)
+
+Bugs? Please check the [open issues](https://github.com/axolotl-ai-cloud/axolotl/issues/bug) else create a new Issue.
+
+PRs are **greatly welcome**!
+
+Please run the quickstart instructions followed by the below to setup env:
+```bash
+pip3 install -r requirements-dev.txt -r requirements-tests.txt
+pre-commit install
+
+# test
+pytest tests/
+
+# optional: run against all files
+pre-commit run --all-files
+```
+
+Thanks to all of our contributors to date. Help drive open source AI progress forward by contributing to Axolotl.
+
+<a href="https://github.com/axolotl-ai-cloud/axolotl/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=openaccess-ai-collective/axolotl" alt="contributor chart by https://contrib.rocks"/>
+</a>
+
+## Sponsors 🤝❤
+
+OpenAccess AI Collective is run by volunteer contributors such as [winglian](https://github.com/winglian),
+[NanoCode012](https://github.com/NanoCode012), [tmm1](https://github.com/tmm1),
+[mhenrichsen](https://github.com/mhenrichsen), [casper-hansen](https://github.com/casper-hansen),
+[hamelsmu](https://github.com/hamelsmu) and many more who help us accelerate forward by fixing bugs, answering
+community questions and implementing new features. Axolotl needs donations from sponsors for the compute needed to
+run our unit & integration tests, troubleshooting community issues, and providing bounties. If you love axolotl,
+consider sponsoring the project via [GitHub Sponsors](https://github.com/sponsors/OpenAccess-AI-Collective),
+[Ko-fi](https://ko-fi.com/axolotl_ai) or reach out directly to
+[wing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org).
+
+---
+
+#### 💎 Diamond Sponsors - [Contact directly](mailto:wing@openaccessaicollective.org)
+
+---
+
+#### 🥇 Gold Sponsors - $5000/mo
+
+---
+
+#### 🥈 Silver Sponsors - $1000/mo
+
+---
+
+#### 🥉 Bronze Sponsors - $500/mo
+
+ - [JarvisLabs.ai](https://jarvislabs.ai)
+
+---
--- a/cicd/Dockerfile.jinja
+++ b/cicd/Dockerfile.jinja
@@ -4,11 +4,11 @@ ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
 ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
 ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
 ENV CUDA="{{ CUDA }}"
+ENV BNB_CUDA_VERSION="{{ CUDA }}"
 ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
 ENV GITHUB_REF="{{ GITHUB_REF }}"
 ENV GITHUB_SHA="{{ GITHUB_SHA }}"
 ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
-ENV HF_HOME="{{ HF_HOME }}"

 RUN apt-get update && \
    apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev
@@ -32,14 +32,11 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
    fi

 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
    fi

-RUN python scripts/unsloth_install.py | sh
-RUN python scripts/cutcrossentropy_install.py | sh
-
 # So we can test the Docker image
 RUN pip install -r requirements-dev.txt -r requirements-tests.txt

--- a/cicd/cicd.sh
+++ b/cicd/cicd.sh
@@ -1,10 +1,6 @@
 #!/bin/bash
 set -e

-python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
-
-pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
-# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
-pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
-pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
-pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
+pytest -n8 --ignore=tests/e2e/ /workspace/axolotl/tests/
+pytest -n1 --dist loadfile -v /workspace/axolotl/tests/e2e/patched/ /workspace/axolotl/tests/e2e/integrations/
+pytest --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
--- a/cicd/multigpu.py
+++ b/cicd/multigpu.py
@@ -28,7 +28,6 @@ df_args = {
    "CUDA": os.environ.get("CUDA", "121"),
    "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
    "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
-    "HF_HOME": "/workspace/data/huggingface-cache/hub",
 }

 dockerfile_contents = df_template.render(**df_args)
@@ -49,12 +48,6 @@ cicd_image = (

 app = App("Axolotl CI/CD", secrets=[])

-hf_cache_volume = modal.Volume.from_name(
-    "axolotl-ci-hf-hub-cache", create_if_missing=True
-)
-VOLUME_CONFIG = {
-    "/workspace/data/huggingface-cache/hub": hf_cache_volume,
-}

 N_GPUS = int(os.environ.get("N_GPUS", 2))
 GPU_CONFIG = modal.gpu.H100(count=N_GPUS)
@@ -74,7 +67,6 @@ def run_cmd(cmd: str, run_folder: str):
    timeout=60 * 60,
    cpu=8.0,
    memory=131072 * N_GPUS,
-    volumes=VOLUME_CONFIG,
 )
 def cicd_pytest():
    run_cmd("./cicd/multigpu.sh", "/workspace/axolotl")
--- a/cicd/tests.py
+++ b/cicd/tests.py
@@ -29,7 +29,6 @@ df_args = {
    "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
    "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
    "NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
-    "HF_HOME": "/workspace/data/huggingface-cache/hub",
 }

 dockerfile_contents = df_template.render(**df_args)
@@ -41,7 +40,6 @@ with open(pathlib.Path(temp_dir) / "Dockerfile", "w", encoding="utf-8") as f:
 cicd_image = (
    Image.from_dockerfile(
        pathlib.Path(temp_dir) / "Dockerfile",
-        context_mount=None,
        force_build=True,
        gpu="A10G",
    )
@@ -51,15 +49,9 @@ cicd_image = (

 app = App("Axolotl CI/CD", secrets=[])

-hf_cache_volume = modal.Volume.from_name(
-    "axolotl-ci-hf-hub-cache", create_if_missing=True
-)
-VOLUME_CONFIG = {
-    "/workspace/data/huggingface-cache/hub": hf_cache_volume,
-}

 N_GPUS = int(os.environ.get("N_GPUS", 1))
-GPU_CONFIG = modal.gpu.L40S(count=N_GPUS)
+GPU_CONFIG = modal.gpu.A10G(count=N_GPUS)


 def run_cmd(cmd: str, run_folder: str):
@@ -76,7 +68,6 @@ def run_cmd(cmd: str, run_folder: str):
    timeout=60 * 60,
    cpu=8.0,
    memory=131072,
-    volumes=VOLUME_CONFIG,
 )
 def cicd_pytest():
    run_cmd("./cicd/cicd.sh", "/workspace/axolotl")
--- a/deepspeed_configs/zero1_torch_compile.json
+++ b/deepspeed_configs/zero1_torch_compile.json
@@ -1,27 +0,0 @@
-{
-  "zero_optimization": {
-    "stage": 1,
-    "overlap_comm": true
-  },
-  "bf16": {
-    "enabled": "auto"
-  },
-  "fp16": {
-    "enabled": "auto",
-    "auto_cast": false,
-    "loss_scale": 0,
-    "initial_scale_power": 32,
-    "loss_scale_window": 1000,
-    "hysteresis": 2,
-    "min_loss_scale": 1
-  },
-  "compile": {
-    "disable": false,
-    "backend": "inductor"
-  },
-  "gradient_accumulation_steps": "auto",
-  "gradient_clipping": "auto",
-  "train_batch_size": "auto",
-  "train_micro_batch_size_per_gpu": "auto",
-  "wall_clock_breakdown": false
-}
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -5,6 +5,7 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
 ARG AXOLOTL_EXTRAS=""
 ARG AXOLOTL_ARGS=""
 ARG CUDA="118"
+ENV BNB_CUDA_VERSION=$CUDA
 ARG PYTORCH_VERSION="2.1.2"

 ENV PYTORCH_VERSION=$PYTORCH_VERSION
@@ -20,14 +21,11 @@ WORKDIR /workspace/axolotl

 # If AXOLOTL_EXTRAS is set, append it in brackets
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
    fi

-RUN python scripts/unsloth_install.py | sh
-RUN python scripts/cutcrossentropy_install.py | sh
-
 # So we can test the Docker image
 RUN pip install pytest

--- a/docker/Dockerfile-base
+++ b/docker/Dockerfile-base
@@ -16,7 +16,7 @@ ENV PYTHON_VERSION=$PYTHON_VERSION
 ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST

 RUN apt-get update \
-    && apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config && rm -rf /var/lib/apt/lists/* \
+    && apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev && rm -rf /var/lib/apt/lists/* \
    && wget \
    https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
    && mkdir /root/.conda \
@@ -29,9 +29,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
 WORKDIR /workspace

 RUN python3 -m pip install --upgrade pip && pip3 install packaging && \
-    python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \
-    python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \
-    python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main"
+    python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA

 RUN git lfs install --skip-repo && \
    pip3 install awscli && \
--- a/docker/Dockerfile-cloud
+++ b/docker/Dockerfile-cloud
@@ -2,7 +2,7 @@ ARG BASE_TAG=main
 FROM axolotlai/axolotl:$BASE_TAG

 ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
-ENV HF_HUB_CACHE="/workspace/data/huggingface-cache/hub"
+ENV HUGGINGFACE_HUB_CACHE="/workspace/data/huggingface-cache/hub"
 ENV HF_HOME="/workspace/data/huggingface-cache/hub"
 ENV HF_HUB_ENABLE_HF_TRANSFER="1"

--- a/docker/Dockerfile-cloud-no-tmux
+++ b/docker/Dockerfile-cloud-no-tmux
@@ -2,7 +2,7 @@ ARG BASE_TAG=main
 FROM axolotlai/axolotl:$BASE_TAG

 ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
-ENV HF_HUB_CACHE="/workspace/data/huggingface-cache/hub"
+ENV HUGGINGFACE_HUB_CACHE="/workspace/data/huggingface-cache/hub"
 ENV HF_HOME="/workspace/data/huggingface-cache/hub"
 ENV HF_HUB_ENABLE_HF_TRANSFER="1"

--- a/docker/Dockerfile-tests
+++ b/docker/Dockerfile-tests
@@ -5,6 +5,7 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
 ARG AXOLOTL_EXTRAS=""
 ARG AXOLOTL_ARGS=""
 ARG CUDA="118"
+ENV BNB_CUDA_VERSION=$CUDA
 ARG PYTORCH_VERSION="2.1.2"
 ARG GITHUB_REF="main"

@@ -24,9 +25,9 @@ RUN git fetch origin +$GITHUB_REF && \

 # If AXOLOTL_EXTRAS is set, append it in brackets
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
    fi

 # So we can test the Docker image
--- a/docs/amd_hpc.qmd
+++ b/docs/amd_hpc.qmd
@@ -52,7 +52,7 @@ export GPU_ARCHS="gfx90a"
 cd flash-attention
 export PYTHON_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
 patch "${PYTHON_SITE_PACKAGES}/torch/utils/hipify/hipify_python.py" hipify_patch.patch
-pip install --no-build-isolation .
+pip install .
 ```

 ### 6. Install Axolotl
@@ -63,7 +63,7 @@ Clone and install Axolotl:
 git clone https://github.com/axolotl-ai-cloud/axolotl
 cd axolotl
 pip install packaging ninja
-pip install --no-build-isolation -e .
+pip install -e .
 ```

 ### 7. Apply xformers Workaround
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -127,40 +127,34 @@ datasets:
    # - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
    # - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
    chat_template: tokenizer_default
-
-    # Custom jinja chat template. Used only if `chat_template: jinja` or empty.
+    # Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
    chat_template_jinja:
-
-    # Key containing the messages (default: "messages")
+    # The key in the data example that contains the messages. Default is "messages".
    field_messages: messages
-    # Key for role in each message (default: "role")
+    # The key in the message turn that contains the role. Default is "role".
    message_field_role: role
-    # Key for content in each message (default:  "content")
+    # The key in the message turn that contains the content. Default is "content".
    message_field_content: content
-
-    # Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
+    # Optional[Dict[str, List]]. Roles mapping for the messages.
    roles:
      user: ["human", "user"]
-      assistant: ["gpt", "assistant"]
+      assistant: ["gpt", "assistant", "ai"]
      system: ["system"]
-      tool: ["tool"]

-    # IMPORTANT: The following fields determine which parts of the conversation to train on.
-    # Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
-    # See examples at `docs/dataset-formats/conversation.qmd`
-    # Note: If the below 4 fields are empty, defaults to training only on the last message.
+    ## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.

    # Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
-    roles_to_train: ["assistant"]  # default
+    roles_to_train: ["gpt", "assistant"]
    # Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
    # - all: train on all EOS tokens
-    # - turn (default): train on the EOS token at the end of each trainable turn
+    # - turn: train on the EOS token at the end of each trainable turn
    # - last: train on the last EOS token in the conversation
    train_on_eos: last
    # The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
    message_field_training: training
    # The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
    # The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
+    # See example at `docs/dataset-formats/conversation.qmd`
    message_field_training_detail: train_detail


@@ -168,9 +162,6 @@ datasets:
 # The same applies to the `test_datasets` option and the `pretraining_dataset` option. Default is true.
 shuffle_merged_datasets: true

-Deduplicates datasets and test_datasets with identical entries.
-dataset_exact_deduplication: true
-
 # A list of one or more datasets to eval the model with.
 # You can use either test_datasets, or val_set_size, but not both.
 test_datasets:
@@ -245,9 +236,6 @@ sample_packing_group_size: 100000
 # The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
 sample_packing_bin_size: 200

-# Use batch flattening for speedups when not using sample_packing
-batch_flattening:
-
 # Passed through to transformers when loading the model when launched without accelerate
 # Use `sequential` when training w/ model parallelism to limit memory
 device_map:
@@ -340,8 +328,7 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
 output_dir: ./completed-model

 # Whether to use torch.compile and which backend to use
-# setting to `auto` will enable torch compile when torch>=2.5.1
-torch_compile:  # Optional[Union[Literal["auto"], bool]]
+torch_compile:  # bool
 torch_compile_backend:  # Optional[str]

 # Training hyperparameters
@@ -373,10 +360,6 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
 eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
 eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]

-profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
-                # see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
-                # snapshots can be visualized @ https://pytorch.org/memory_viz
-
 loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
 loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)

@@ -423,7 +406,7 @@ lr_div_factor: # Learning rate div factor
 # - adamw_torch_fused
 # - adamw_torch_xla
 # - adamw_apex_fused
-# - adopt_adamw (an EXPERIMENTAL optimizer, only for torch version >= 2.5.1)
+# - adopt_adamw (only for torch version >= 2.5.1)
 # - adafactor
 # - adamw_anyprecision
 # - sgd
--- a/docs/dataset-formats/conversation.qmd
+++ b/docs/dataset-formats/conversation.qmd
@@ -68,8 +68,6 @@ We recommend checking the below examples for other usecases.
 datasets:
  - path: ...
    type: chat_template
-    roles_to_train:
-    train_on_eos:
 ```

 2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
@@ -79,7 +77,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
 datasets:
  - path: ...
    type: chat_template
-    roles_to_train: ["assistant"]  # default value
+    roles_to_train: ["assistant"]
 ```

 3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
@@ -89,6 +87,7 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
 datasets:
  - path: ...
    type: chat_template
+    roles_to_train: ["assistant"]
 ```

 4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
@@ -100,6 +99,7 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
 datasets:
  - path: ...
    type: chat_template
+    roles_to_train: ["assistant"]
 ```

 5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
--- a/docs/dataset-formats/pretraining.qmd
+++ b/docs/dataset-formats/pretraining.qmd
@@ -19,14 +19,7 @@ For pretraining, there is no prompt template or roles.  The only required field
 Axolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming:

 ```{.yaml filename="config.yaml"}
-pretraining_dataset:
-  - name:
-    path:
-    split:
-    text_column: # column in dataset with the data, usually `text`
-    type: pretrain
-    trust_remote_code:
-    skip: # number of rows of data to skip over from the beginning
+pretraining_dataset: # hf path only
 ...
 ```

--- a/docs/debugging.qmd
+++ b/docs/debugging.qmd
@@ -71,7 +71,7 @@ Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/us

 ```bash
 pip3 install packaging
-pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
+pip3 install -e '.[flash-attn,deepspeed]'
 ```

 #### Remote Hosts
@@ -212,7 +212,7 @@ You will now be in the container.  Next, perform an editable install of Axolotl:

 ```bash
 pip3 install packaging
-pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
+pip3 install -e '.[flash-attn,deepspeed]'
 ```

 ### Attach To Container
--- a/docs/rlhf.qmd
+++ b/docs/rlhf.qmd
@@ -29,7 +29,7 @@ datasets:
    type: chatml.intel
  - path: argilla/ultrafeedback-binarized-preferences
    split: train
-    type: chatml
+    type: chatml.argilla
 ```

 #### IPO
@@ -52,26 +52,6 @@ datasets:
    type: chat_template.argilla
 ```

-
-#### KTO
-
-```yaml
-rl: kto
-rl_beta: 0.5
-kto_desirable_weight: 0.2
-
-remove_unused_columns: false
-
-datasets:
-  - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto
-    type: llama3.ultra
-    split: train
-
-gradient_checkpointing: true
-gradient_checkpointing_kwargs:
-  use_reentrant: true
-```
-
 #### Using local dataset files
 ```yaml
 datasets:
--- a/examples/cerebras/btlm-ft.yml
+++ b/examples/cerebras/btlm-ft.yml
@@ -1,10 +1,6 @@
 base_model: cerebras/btlm-3b-8k-base
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: GPT2Tokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true
 tokenizer_use_fast: true
 tokenizer_legacy: true
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -1,7 +1,4 @@
 base_model: cerebras/Cerebras-GPT-1.3B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: true
 strict: false
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-13b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-13b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-34b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-34b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/colab-notebooks/colab-axolotl-example.ipynb
+++ b/examples/colab-notebooks/colab-axolotl-example.ipynb
@@ -24,7 +24,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "!pip install --no-build-isolation axolotl[deepspeed]"
+    "!pip install axolotl[deepspeed]"
   ]
  },
  {
--- a/examples/dbrx/16bit-lora.yaml
+++ b/examples/dbrx/16bit-lora.yaml
@@ -1,7 +1,4 @@
 base_model: LnL-AI/dbrx-base-converted-v2
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/dbrx/8bit-lora.yaml
+++ b/examples/dbrx/8bit-lora.yaml
@@ -1,7 +1,4 @@
 base_model: LnL-AI/dbrx-base-converted-v2
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: true
--- a/examples/dbrx/fft-ds-zero3.yaml
+++ b/examples/dbrx/fft-ds-zero3.yaml
@@ -1,7 +1,4 @@
 base_model: LnL-AI/dbrx-base-converted-v2
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/deepseek-v2/fft-fsdp-16b.yaml
+++ b/examples/deepseek-v2/fft-fsdp-16b.yaml
@@ -1,6 +1,4 @@
 base_model: deepseek-ai/DeepSeek-V2-Lite
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/deepseek-v2/qlora-fsdp-2_5.yaml
+++ b/examples/deepseek-v2/qlora-fsdp-2_5.yaml
@@ -1,7 +1,4 @@
 base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -1,12 +1,7 @@
 base_model: tiiuae/falcon-7b
-# optionally might have model_type or tokenizer_type
+trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
-trust_remote_code: true

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -1,15 +1,10 @@
 # 1b: tiiuae/falcon-rw-1b
 # 40b: tiiuae/falcon-40b
 base_model: tiiuae/falcon-7b
-# optionally might have model_type or tokenizer_type
-model_type: AutoModelForCausalLM
-tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
 trust_remote_code: true
-
+model_type: AutoModelForCausalLM
+tokenizer_type: AutoTokenizer

 load_in_8bit: false
 # enable 4bit for QLoRA
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -1,12 +1,7 @@
 base_model: tiiuae/falcon-7b
-# optionally might have model_type or tokenizer_type
+trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
-trust_remote_code: true

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/gemma/qlora.yml
+++ b/examples/gemma/qlora.yml
@@ -1,10 +1,7 @@
 # use google/gemma-7b if you have access
 base_model: mhenrichsen/gemma-7b
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/gemma2/qlora.yml
+++ b/examples/gemma2/qlora.yml
@@ -1,9 +1,6 @@
 base_model: google/gemma-2-9b
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/gemma2/reward-model.yaml
+++ b/examples/gemma2/reward-model.yaml
@@ -1,9 +1,6 @@
 base_model: google/gemma-2-2b
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForSequenceClassification
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -1,7 +1,4 @@
 base_model: EleutherAI/gpt-j-6b
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: true
 strict: false
--- a/examples/jamba/qlora.yaml
+++ b/examples/jamba/qlora.yaml
@@ -1,7 +1,4 @@
 base_model: ai21labs/Jamba-v0.1
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/jamba/qlora_deepspeed.yaml
+++ b/examples/jamba/qlora_deepspeed.yaml
@@ -1,6 +1,4 @@
 base_model: ai21labs/Jamba-v0.1
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/jamba/qlora_fsdp_large.yaml
+++ b/examples/jamba/qlora_fsdp_large.yaml
@@ -1,8 +1,5 @@
 base_model: ai21labs/AI21-Jamba-1.5-Large
-# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_4bit: true
 strict: false
--- a/examples/jeopardy-bot/config.yml
+++ b/examples/jeopardy-bot/config.yml
@@ -1,10 +1,6 @@
 base_model: huggyllama/llama-7b
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 datasets:
  - path: openaccess-ai-collective/jeopardy
--- a/examples/llama-2/fft_optimized.yml
+++ b/examples/llama-2/fft_optimized.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-2/gptq-lora.yml
+++ b/examples/llama-2/gptq-lora.yml
@@ -1,13 +1,8 @@
 base_model: TheBloke/Llama-2-7B-GPTQ
-# optionally might have model_type or tokenizer_type
-model_type: AutoModelForCausalLM
-tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 gptq: true
 gptq_disable_exllama: true
-
+model_type: AutoModelForCausalLM
+tokenizer_type: LlamaTokenizer
 tokenizer_use_fast: true
 tokenizer_legacy: true
 load_in_8bit: false
--- a/examples/llama-2/lisa.yml
+++ b/examples/llama-2/lisa.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-2/loftq.yml
+++ b/examples/llama-2/loftq.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-2/qlora-fsdp.yml
+++ b/examples/llama-2/qlora-fsdp.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-3-vision/lora-11b.yaml
+++ b/examples/llama-3-vision/lora-11b.yaml
@@ -1,9 +1,5 @@
 base_model: alpindale/Llama-3.2-11B-Vision-Instruct
-# optionally might have model_type or tokenizer_type or processor_type
 processor_type: AutoProcessor
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 strict: false

 # these 3 lines are needed for now to handle vision chat templates w images
--- a/examples/llama-3/fft-8b-liger-fsdp.yaml
+++ b/examples/llama-3/fft-8b-liger-fsdp.yaml
@@ -1,6 +1,4 @@
 base_model: NousResearch/Meta-Llama-3.1-8B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 plugins:
  - axolotl.integrations.liger.LigerPlugin
--- a/examples/llama-3/fft-8b.yaml
+++ b/examples/llama-3/fft-8b.yaml
@@ -1,6 +1,4 @@
 base_model: NousResearch/Meta-Llama-3.1-8B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-3/instruct-dpo-lora-8b.yml
+++ b/examples/llama-3/instruct-dpo-lora-8b.yml
@@ -1,9 +1,6 @@
 base_model: meta-llama/Meta-Llama-3-8B-Instruct
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/instruct-lora-8b.yml
+++ b/examples/llama-3/instruct-lora-8b.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Meta-Llama-3-8B-Instruct
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/lora-1b-deduplicate-dpo.yml
+++ b/examples/llama-3/lora-1b-deduplicate-dpo.yml
@@ -1,98 +0,0 @@
-base_model: meta-llama/Llama-3.2-1B
-# optionally might have model_type or tokenizer_type
-model_type: LlamaForCausalLM
-tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-load_in_8bit: true
-load_in_4bit: false
-strict: false
-
-chat_template: llama3
-rl: dpo
-datasets:
-  - path: fozziethebeat/alpaca_messages_2k_dpo_test
-    type: chat_template.default
-    field_messages: conversation
-    field_chosen: chosen
-    field_rejected: rejected
-    message_field_role: role
-    message_field_content: content
-    roles:
-      system:
-        - system
-      user:
-        - user
-      assistant:
-        - assistant
-  - path: fozziethebeat/alpaca_messages_2k_dpo_test
-    type: chat_template.default
-    field_messages: conversation
-    field_chosen: chosen
-    field_rejected: rejected
-    message_field_role: role
-    message_field_content: content
-    roles:
-      system:
-        - system
-      user:
-        - user
-      assistant:
-        - assistant
-
-dataset_exact_deduplication: true
-dataset_prepared_path:
-val_set_size: 0
-output_dir: ./outputs/lora-out
-
-sequence_len: 4096
-sample_packing: false
-pad_to_sequence_len: true
-
-adapter: lora
-lora_model_dir:
-lora_r: 32
-lora_alpha: 16
-lora_dropout: 0.05
-lora_target_linear: true
-lora_fan_in_fan_out:
-
-wandb_project:
-wandb_entity:
-wandb_watch:
-wandb_name:
-wandb_log_model:
-
-gradient_accumulation_steps: 4
-micro_batch_size: 2
-num_epochs: 4
-optimizer: adamw_bnb_8bit
-lr_scheduler: cosine
-learning_rate: 0.0002
-
-train_on_inputs: false
-group_by_length: false
-bf16: auto
-fp16:
-tf32: false
-
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-s2_attention:
-
-warmup_steps: 10
-evals_per_epoch: 4
-eval_table_size:
-eval_max_new_tokens: 128
-saves_per_epoch: 1
-debug:
-deepspeed:
-weight_decay: 0.0
-fsdp:
-fsdp_config:
--- a/examples/llama-3/lora-1b-deduplicate-sft.yml
+++ b/examples/llama-3/lora-1b-deduplicate-sft.yml
@@ -1,79 +0,0 @@
-base_model: meta-llama/Llama-3.2-1B
-# optionally might have model_type or tokenizer_type
-model_type: LlamaForCausalLM
-tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-load_in_8bit: true
-load_in_4bit: false
-strict: false
-
-datasets:
-  - path: mhenrichsen/alpaca_2k_test
-    type: alpaca
-  - path: mhenrichsen/alpaca_2k_test
-    type: alpaca
-dataset_prepared_path:
-val_set_size: 0.0
-output_dir: ./outputs/lora-out
-
-dataset_exact_deduplication: true
-test_value: true
-
-sequence_len: 4096
-sample_packing: true
-eval_sample_packing: false
-pad_to_sequence_len: true
-
-adapter: lora
-lora_model_dir:
-lora_r: 32
-lora_alpha: 16
-lora_dropout: 0.05
-lora_target_linear: true
-lora_fan_in_fan_out:
-lora_modules_to_save:
-  - embed_tokens
-  - lm_head
-
-wandb_project:
-wandb_entity:
-wandb_watch:
-wandb_name:
-wandb_log_model:
-
-gradient_accumulation_steps: 4
-micro_batch_size: 2
-num_epochs: 4
-optimizer: adamw_bnb_8bit
-lr_scheduler: cosine
-learning_rate: 0.0002
-
-train_on_inputs: false
-group_by_length: false
-bf16: auto
-fp16:
-tf32: false
-
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-s2_attention:
-
-warmup_steps: 10
-evals_per_epoch: 4
-eval_table_size:
-eval_max_new_tokens: 128
-saves_per_epoch: 1
-debug:
-deepspeed:
-weight_decay: 0.0
-fsdp:
-fsdp_config:
-special_tokens:
-   pad_token: <|end_of_text|>
--- a/examples/llama-3/lora-1b.yml
+++ b/examples/llama-3/lora-1b.yml
@@ -1,76 +0,0 @@
-base_model: NousResearch/Llama-3.2-1B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-load_in_8bit: false
-load_in_4bit: false
-strict: false
-
-datasets:
-  - path: teknium/GPT4-LLM-Cleaned
-    type: alpaca
-dataset_prepared_path: last_run_prepared
-val_set_size: 0.1
-output_dir: ./outputs/lora-out
-
-adapter: lora
-lora_model_dir:
-
-sequence_len: 2048
-sample_packing: true
-eval_sample_packing: true
-pad_to_sequence_len: true
-
-lora_r: 16
-lora_alpha: 32
-lora_dropout: 0.05
-lora_fan_in_fan_out:
-lora_target_modules:
-  - gate_proj
-  - down_proj
-  - up_proj
-  - q_proj
-  - v_proj
-  - k_proj
-  - o_proj
-
-wandb_project:
-wandb_entity:
-wandb_watch:
-wandb_name:
-wandb_log_model:
-
-gradient_accumulation_steps: 2
-micro_batch_size: 2
-num_epochs: 1
-optimizer: adamw_8bit
-lr_scheduler: cosine
-learning_rate: 0.0002
-
-train_on_inputs: false
-group_by_length: false
-bf16: auto
-fp16:
-tf32: false
-
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-
-loss_watchdog_threshold: 5.0
-loss_watchdog_patience: 3
-
-warmup_steps: 10
-evals_per_epoch: 4
-saves_per_epoch: 1
-debug:
-deepspeed:
-weight_decay: 0.0
-fsdp:
-fsdp_config:
-special_tokens:
-  pad_token: "<|end_of_text|>"
--- a/examples/llama-3/lora-8b.yml
+++ b/examples/llama-3/lora-8b.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Meta-Llama-3-8B
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/qlora-1b-kto.yaml
+++ b/examples/llama-3/qlora-1b-kto.yaml
@@ -1,77 +0,0 @@
-base_model: meta-llama/Llama-3.2-1B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-load_in_8bit: false
-load_in_4bit: true
-strict: false
-
-rl: kto
-rl_beta: 0.5
-kto_desirable_weight: 0.2
-
-datasets:
-  - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto
-    type: llama3.ultra
-    split: train
-dataset_prepared_path: last_run_prepared
-val_set_size: 0.0
-output_dir: ./outputs/qlora-out
-
-remove_unused_columns: false
-
-adapter: qlora
-lora_model_dir:
-
-sequence_len: 2048
-sample_packing: false  # not supported with kto
-eval_sample_packing: false
-pad_to_sequence_len: false
-
-lora_r: 32
-lora_alpha: 64
-lora_dropout: 0.05
-lora_target_linear: true
-lora_fan_in_fan_out:
-
-wandb_project:
-wandb_entity:
-wandb_watch:
-wandb_name:
-wandb_log_model:
-
-gradient_accumulation_steps: 1
-micro_batch_size: 2
-num_epochs: 1
-optimizer: adamw_8bit
-lr_scheduler: cosine
-learning_rate: 0.0002
-
-train_on_inputs: false
-group_by_length: false
-bf16: auto
-fp16:
-tf32: true
-
-gradient_checkpointing: true
-gradient_checkpointing_kwargs:
-  use_reentrant: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-
-warmup_steps: 20
-evals_per_epoch: 4
-eval_table_size:
-eval_max_new_tokens: 128
-saves_per_epoch: 1
-debug:
-deepspeed:
-weight_decay: 0.0
-fsdp:
-fsdp_config:
-special_tokens:
-  pad_token: "<|end_of_text|>"
--- a/examples/llama-3/qlora-1b.yml
+++ b/examples/llama-3/qlora-1b.yml
@@ -1,6 +1,4 @@
-base_model: NousResearch/Llama-3.2-1B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
+base_model: meta-llama/Llama-3.2-1B

 load_in_8bit: false
 load_in_4bit: true
@@ -24,6 +22,7 @@ pad_to_sequence_len: true
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
+lora_target_linear: true
 lora_fan_in_fan_out:
 lora_target_modules:
  - gate_proj
--- a/examples/llama-3/qlora-fsdp-405b.yaml
+++ b/examples/llama-3/qlora-fsdp-405b.yaml
@@ -1,8 +1,5 @@
 base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
-# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_4bit: true
 strict: false
--- a/examples/llama-3/qlora-fsdp-70b.yaml
+++ b/examples/llama-3/qlora-fsdp-70b.yaml
@@ -1,9 +1,6 @@
 base_model: casperhansen/llama-3-70b-fp16
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer  # PreTrainedTokenizerFast
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-3/qlora.yml
+++ b/examples/llama-3/qlora.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Meta-Llama-3-8B
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mamba/config.yml
+++ b/examples/mamba/config.yml
@@ -1,10 +1,7 @@
 base_model: state-spaces/mamba-2.8b
-# optionally might have model_type or tokenizer_type or tokenizer_config
 model_type: MambaLMHeadModel
 tokenizer_type: AutoTokenizer
 tokenizer_config: EleutherAI/gpt-neox-20b
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/mistral/bigstral-ds-zero3.yaml
+++ b/examples/mistral/bigstral-ds-zero3.yaml
@@ -1,10 +1,6 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/mistral/lora-mps.yml
+++ b/examples/mistral/lora-mps.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/mistral/lora.yml
+++ b/examples/mistral/lora.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/mistral/mistral-dpo-qlora.yml
+++ b/examples/mistral/mistral-dpo-qlora.yml
@@ -4,11 +4,8 @@
 #face problems with the special tokens.

 base_model: mistralai/Mistral-7B-Instruct-v0.2
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mistral/mistral-qlora-fsdp.yml
+++ b/examples/mistral/mistral-qlora-fsdp.yml
@@ -1,10 +1,6 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/mistral-qlora-orpo.yml
+++ b/examples/mistral/mistral-qlora-orpo.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
@@ -1,9 +1,6 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mistral/mixtral-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-qlora-fsdp.yml
@@ -1,10 +1,6 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/mixtral.yml
+++ b/examples/mistral/mixtral.yml
@@ -1,10 +1,6 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/mixtral_22.yml
+++ b/examples/mistral/mixtral_22.yml
@@ -1,10 +1,6 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -1,9 +1,5 @@
 base_model: mosaicml/mpt-7b
-# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true  # required for mpt as their model class is not merged into transformers yet
 load_in_8bit: false
 datasets:
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -1,10 +1,6 @@
 base_model: openlm-research/open_llama_3b_v2
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: false
 strict: false
--- a/examples/openllama-3b/lora.yml
+++ b/examples/openllama-3b/lora.yml
@@ -1,10 +1,6 @@
 base_model: openlm-research/open_llama_3b_v2
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: true
 load_in_4bit: false
 strict: false
--- a/examples/openllama-3b/qlora.yml
+++ b/examples/openllama-3b/qlora.yml
@@ -1,10 +1,6 @@
 base_model: openlm-research/open_llama_3b_v2
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: true
 strict: false
--- a/examples/phi/lora-3.5.yaml
+++ b/examples/phi/lora-3.5.yaml
@@ -1,9 +1,6 @@
 base_model: microsoft/Phi-3.5-mini-instruct
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/phi-1_5
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/phi-1_5
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/phi/phi2-ft.yml
+++ b/examples/phi/phi2-ft.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/phi-2
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/phi/phi3-ft-fsdp.yml
+++ b/examples/phi/phi3-ft-fsdp.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/Phi-3-mini-4k-instruct
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/phi/phi3-ft.yml
+++ b/examples/phi/phi3-ft.yml
@@ -1,11 +1,7 @@
 base_model: microsoft/Phi-3-mini-4k-instruct
-# optionally might have model_type or tokenizer_type
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 chat_template: phi_3

 load_in_8bit: false
--- a/examples/pythia-12b/config.yml
+++ b/examples/pythia-12b/config.yml
@@ -1,11 +1,7 @@
 base_model: EleutherAI/pythia-12b-deduped
 base_model_ignore_patterns: pytorch*  # prefer safetensors
-# optionally might have model_type or tokenizer_type
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: false
 gptq: false
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -1,7 +1,4 @@
 base_model: EleutherAI/pythia-1.4b-deduped
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: true
 datasets:
  - path: teknium/GPT4-LLM-Cleaned
--- a/examples/qwen/lora.yml
+++ b/examples/qwen/lora.yml
@@ -1,9 +1,6 @@
 base_model: Qwen/Qwen-7B
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 trust_remote_code: true

--- a/Show More
+++ b/Show More