feat: add support for qwen25 vl for multimodal

revert seq len to 8192
settings
2025-02-18 12:42:29 +07:00 · 2024-12-08 22:30:20 -05:00 · 2024-12-08 22:22:18 -05:00 · 2024-12-06 16:06:57 -05:00 · 2024-12-06 15:41:09 -05:00 · 2024-12-06 15:27:18 -05:00
188 changed files with 1628 additions and 5272 deletions
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -13,13 +13,10 @@ jobs:
    permissions:
      contents: write
    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
      - name: Create release
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: gh release create "$GITHUB_REF_NAME" --generate-notes
+        run: gh release create "$GITHUB_REF_NAME" # GITHUB_REF_NAME is the tag name in `on.push.tags` workflows
  pypi-publish:
    name: Upload release to PyPI
    runs-on: ubuntu-latest
@@ -41,7 +38,7 @@ jobs:
      - name: Install dependencies
        run: |
          pip3 install wheel packaging
-          pip3 install --no-build-isolation -e .
+          pip3 install -e .
          pip3 install -r requirements-dev.txt -r requirements-tests.txt

      - name: Extract tag name
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -23,15 +23,9 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
-      max-parallel: 2
      matrix:
        python_version: ["3.10", "3.11"]
        pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
-        exclude:
-          - python_version: "3.10"
-            pytorch_version: "2.4.1"
-          - python_version: "3.10"
-            pytorch_version: "2.5.1"
    timeout-minutes: 20

    steps:
@@ -44,11 +38,6 @@ jobs:
          python-version: ${{ matrix.python_version }}
          cache: 'pip' # caching pip dependencies

-      - name: upgrade pip
-        run: |
-          pip3 install --upgrade pip
-          pip3 install --upgrade packaging setuptools wheel
-
      - name: Install PyTorch
        run: |
          pip3 install torch==${{ matrix.pytorch_version }} --index-url https://download.pytorch.org/whl/cpu
@@ -65,23 +54,13 @@ jobs:
        run: |
          pip3 install --upgrade pip
          pip3 install --upgrade packaging
-          pip3 install --no-build-isolation -U -e .
-          python scripts/unsloth_install.py | sh
+          pip3 install -U -e .
          python scripts/cutcrossentropy_install.py | sh
          pip3 install -r requirements-dev.txt -r requirements-tests.txt

-      - name: Make sure PyTorch version wasn't clobbered
-        run: |
-          python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
-
-      - name: Ensure axolotl CLI was installed
-        run: |
-          axolotl --help
-
      - name: Run tests
        run: |
-          pytest -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/
-          pytest tests/patched/
+          pytest --ignore=tests/e2e/ tests/

      - name: cleanup pip cache
        run: |
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -10,7 +10,6 @@ on:
      - '.github/workflows/*.yml'
      - 'requirements-tests.txt'
      - 'cicd/cicd.sh'
-      - 'cicd/Dockerfile.jinja'
  pull_request:
      paths:
       - '**.py'
@@ -18,7 +17,6 @@ on:
       - '.github/workflows/*.yml'
       - 'requirements-tests.txt'
       - 'cicd/cicd.sh'
-       - 'cicd/Dockerfile.jinja'
  workflow_dispatch:

 # Cancel jobs on the same ref if a new one is triggered
@@ -45,15 +43,9 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
-      max-parallel: 2
      matrix:
        python_version: ["3.10", "3.11"]
        pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
-        exclude:
-          - python_version: "3.10"
-            pytorch_version: "2.4.1"
-          - python_version: "3.10"
-            pytorch_version: "2.5.1"
    timeout-minutes: 20

    steps:
@@ -78,23 +70,14 @@ jobs:
      - name: Install dependencies
        run: |
          pip3 show torch
-          pip3 install --no-build-isolation -U -e .
+          pip3 install -U -e .
          python scripts/unsloth_install.py | sh
          python scripts/cutcrossentropy_install.py | sh
          pip3 install -r requirements-dev.txt -r requirements-tests.txt

-      - name: Make sure PyTorch version wasn't clobbered
-        run: |
-          python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
-
-      - name: Ensure axolotl CLI was installed
-        run: |
-          axolotl --help
-
      - name: Run tests
        run: |
-          pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/
-          pytest -v tests/patched/
+          pytest -n8 --ignore=tests/e2e/ tests/

      - name: cleanup pip cache
        run: |
@@ -105,7 +88,6 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
-      max-parallel: 1
      matrix:
        python_version: ["3.11"]
        pytorch_version: ["2.4.1", "2.5.1"]
@@ -124,7 +106,7 @@ jobs:
      - name: upgrade pip
        run: |
          pip3 install --upgrade pip
-          pip3 install --upgrade packaging setuptools setuptools_scm build wheel
+          pip3 install --upgrade packaging setuptools wheel

      - name: Install PyTorch
        run: |
@@ -133,24 +115,13 @@ jobs:
      - name: Install dependencies
        run: |
          pip3 show torch
-          python -m build --no-isolation --sdist
-          pip3 install --no-build-isolation dist/axolotl*.tar.gz
-          python scripts/unsloth_install.py | sh
-          python scripts/cutcrossentropy_install.py | sh
+          python3 setup.py sdist
+          pip3 install dist/axolotl*.tar.gz
          pip3 install -r requirements-dev.txt -r requirements-tests.txt

-      - name: Make sure PyTorch version wasn't clobbered
-        run: |
-          python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
-
-      - name: Ensure axolotl CLI was installed
-        run: |
-          axolotl --help
-
      - name: Run tests
        run: |
-          pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/
-          pytest -v tests/patched/
+          pytest -n8 --ignore=tests/e2e/ tests/

      - name: cleanup pip cache
        run: |
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 **/axolotl.egg-info
 configs
 last_run_prepared/
-outputs
 .vscode
 _site/

--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,4 @@
 include requirements.txt
 include README.md
 include LICENSE
-include src/setuptools_axolotl_dynamic_dependencies.py
 recursive-include axolotl *.py
--- a/README.md
+++ b/README.md
@@ -10,13 +10,9 @@
    <img src="https://img.shields.io/github/license/axolotl-ai-cloud/axolotl.svg?color=blue" alt="GitHub License">
    <img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/tests.yml/badge.svg" alt="tests">
    <a href="https://github.com/axolotl-ai-cloud/axolotl/releases"><img src="https://img.shields.io/github/release/axolotl-ai-cloud/axolotl.svg" alt="Releases"></a>
-    <br/>
-    <a href="https://github.com/axolotl-ai-cloud/axolotl/graphs/contributors"><img src="https://img.shields.io/github/contributors-anon/axolotl-ai-cloud/axolotl?color=yellow&style=flat-square" alt="contributors" style="height: 20px;"></a>
    <img src="https://img.shields.io/github/stars/axolotl-ai-cloud/axolotl" alt="GitHub Repo stars">
-    <br/>
-    <a href="https://discord.com/invite/HhrNrHJPRb"><img src="https://img.shields.io/badge/discord-7289da.svg?style=flat-square&logo=discord" alt="discord" style="height: 20px;"></a>
-    <a href="https://twitter.com/axolotl_ai"><img src="https://img.shields.io/twitter/follow/axolotl_ai?style=social" alt="twitter" style="height: 20px;"></a>
-    <br/>
+</p>
+<p align="center">
    <img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/tests-nightly.yml/badge.svg" alt="tests-nightly">
    <img src="https://github.com/axolotl-ai-cloud/axolotl/actions/workflows/multi-gpu-e2e.yml/badge.svg" alt="multigpu-semi-weekly tests">
 </p>
@@ -45,13 +41,9 @@ Features:
 ## Table of Contents
 - [Axolotl](#axolotl)
  - [Table of Contents](#table-of-contents)
-  - [Quickstart ⚡](#quickstart-)
-    - [Edge Builds](#edge-builds-)
-    - [Axolotl CLI Usage](#axolotl-cli-usage)
-  - [Badge ❤🏷️](#badge-️)
-  - [Contributing 🤝](#contributing-)
-  - [Sponsors 🤝❤](#sponsors-)
  - [Axolotl supports](#axolotl-supports)
+  - [Quickstart ⚡](#quickstart-)
+    - [Usage](#usage)
  - [Advanced Setup](#advanced-setup)
    - [Environment](#environment)
      - [Docker](#docker)
@@ -83,6 +75,14 @@ Features:
    - [Tokenization Mismatch b/w Inference \& Training](#tokenization-mismatch-bw-inference--training)
  - [Debugging Axolotl](#debugging-axolotl)
  - [Need help? 🙋](#need-help-)
+  - [Badge ❤🏷️](#badge-️)
+  - [Community Showcase](#community-showcase)
+  - [Contributing 🤝](#contributing-)
+  - [Sponsors 🤝❤](#sponsors-)
+      - [💎 Diamond Sponsors - Contact directly](#-diamond-sponsors---contact-directly)
+      - [🥇 Gold Sponsors - $5000/mo](#-gold-sponsors---5000mo)
+      - [🥈 Silver Sponsors - $1000/mo](#-silver-sponsors---1000mo)
+      - [🥉 Bronze Sponsors - $500/mo](#-bronze-sponsors---500mo)

 </td>
 <td>
@@ -105,148 +105,6 @@ Features:
 </tr>
 </table>

-## Quickstart ⚡
-
-Get started with Axolotl in just a few steps! This quickstart guide will walk you through setting up and running a basic fine-tuning task.
-
-**Requirements**: *Nvidia* GPU (Ampere architecture or newer for `bf16` and Flash Attention) or *AMD* GPU, Python >=3.10 and PyTorch >=2.3.1.
-
-```bash
-pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
-
-# download examples and optionally deepspeed configs to the local path
-axolotl fetch examples
-axolotl fetch deepspeed_configs  # OPTIONAL
-
-# finetune using lora
-axolotl train examples/llama-3/lora-1b.yml
-```
-
-### Edge Builds 🏎️
-
-If you're looking for the latest features and updates between releases, you'll need to install
-from source.
-
-```bash
-git clone https://github.com/axolotl-ai-cloud/axolotl.git
-cd axolotl
-pip3 install packaging ninja
-pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
-```
-
-### Axolotl CLI Usage
-We now support a new, more streamlined CLI using [click](https://click.palletsprojects.com/en/stable/).
-
-```bash
-# preprocess datasets - optional but recommended
-CUDA_VISIBLE_DEVICES="0" axolotl preprocess examples/llama-3/lora-1b.yml
-
-# finetune lora
-axolotl train examples/llama-3/lora-1b.yml
-
-# inference
-axolotl inference examples/llama-3/lora-1b.yml \
-    --lora-model-dir="./outputs/lora-out"
-
-# gradio
-axolotl inference examples/llama-3/lora-1b.yml \
-    --lora-model-dir="./outputs/lora-out" --gradio
-
-# remote yaml files - the yaml config can be hosted on a public URL
-# Note: the yaml config must directly link to the **raw** yaml
-axolotl train https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-3/lora-1b.yml
-```
-
-We've also added a new command for fetching `examples` and `deepspeed_configs` to your
-local machine. This will come in handy when installing `axolotl` from PyPI.
-
-```bash
-# Fetch example YAML files (stores in "examples/" folder)
-axolotl fetch examples
-
-# Fetch deepspeed config files (stores in "deepspeed_configs/" folder)
-axolotl fetch deepspeed_configs
-
-# Optionally, specify a destination folder
-axolotl fetch examples --dest path/to/folder
-```
-
-### Legacy Usage
-<details>
-
-<summary>Click to Expand</summary>
-
-While the Axolotl CLI is the preferred method for interacting with axolotl, we
-still support the legacy `-m axolotl.cli.*` usage.
-
-```bash
-# preprocess datasets - optional but recommended
-CUDA_VISIBLE_DEVICES="0" python -m axolotl.cli.preprocess examples/llama-3/lora-1b.yml
-
-# finetune lora
-accelerate launch -m axolotl.cli.train examples/llama-3/lora-1b.yml
-
-# inference
-accelerate launch -m axolotl.cli.inference examples/llama-3/lora-1b.yml \
-    --lora_model_dir="./outputs/lora-out"
-
-# gradio
-accelerate launch -m axolotl.cli.inference examples/llama-3/lora-1b.yml \
-    --lora_model_dir="./outputs/lora-out" --gradio
-
-# remote yaml files - the yaml config can be hosted on a public URL
-# Note: the yaml config must directly link to the **raw** yaml
-accelerate launch -m axolotl.cli.train https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/llama-3/lora-1b.yml
-```
-
-</details>
-
-## Badge ❤🏷️
-
-Building something cool with Axolotl? Consider adding a badge to your model card.
-
-```markdown
-[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
-```
-
-[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
-
-## Sponsors 🤝❤
-
-If you love axolotl, consider sponsoring the project by reaching out directly to [wing@axolotl.ai](mailto:wing@axolotl.ai).
-
---
-
- [Modal](https://modal.com/) Modal lets you run data/AI jobs in the cloud, by just writing a few lines of Python. Customers use Modal to deploy Gen AI models at large scale, fine-tune LLM models, run protein folding simulations, and much more.
-
---
-
-## Contributing 🤝
-
-Please read the [contributing guide](./.github/CONTRIBUTING.md)
-
-Bugs? Please check the [open issues](https://github.com/axolotl-ai-cloud/axolotl/issues/bug) else create a new Issue.
-
-PRs are **greatly welcome**!
-
-Please run the quickstart instructions followed by the below to setup env:
-```bash
-pip3 install -r requirements-dev.txt -r requirements-tests.txt
-pre-commit install
-
-# test
-pytest tests/
-
-# optional: run against all files
-pre-commit run --all-files
-```
-
-Thanks to all of our contributors to date. Help drive open source AI progress forward by contributing to Axolotl.
-
-<a href="https://github.com/axolotl-ai-cloud/axolotl/graphs/contributors">
-  <img src="https://contrib.rocks/image?repo=openaccess-ai-collective/axolotl" alt="contributor chart by https://contrib.rocks"/>
-</a>
-
 ## Axolotl supports

 |             | fp16/fp32 | lora | qlora | gptq | gptq w/flash attn | flash attn | xformers attn |
@@ -272,6 +130,41 @@ Thanks to all of our contributors to date. Help drive open source AI progress fo
 ❌: not supported
 ❓: untested

+## Quickstart ⚡
+
+Get started with Axolotl in just a few steps! This quickstart guide will walk you through setting up and running a basic fine-tuning task.
+
+**Requirements**: Nvidia GPU (Ampere architecture or newer for `bf16` and Flash Attention), Python >=3.10 and PyTorch >=2.3.1.
+
+```bash
+git clone https://github.com/axolotl-ai-cloud/axolotl
+cd axolotl
+
+pip3 install packaging ninja
+pip3 install -e '.[flash-attn,deepspeed]'
+```
+
+### Usage
+```bash
+# preprocess datasets - optional but recommended
+CUDA_VISIBLE_DEVICES="0" python -m axolotl.cli.preprocess examples/openllama-3b/lora.yml
+
+# finetune lora
+accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
+
+# inference
+accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
+    --lora_model_dir="./outputs/lora-out"
+
+# gradio
+accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
+    --lora_model_dir="./outputs/lora-out" --gradio
+
+# remote yaml files - the yaml config can be hosted on a public URL
+# Note: the yaml config must directly link to the **raw** yaml
+accelerate launch -m axolotl.cli.train https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/examples/openllama-3b/lora.yml
+```
+
 ## Advanced Setup

 ### Environment
@@ -320,7 +213,7 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
  3. Install Axolotl along with python dependencies
        ```bash
        pip3 install packaging
-        pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
+        pip3 install -e '.[flash-attn,deepspeed]'
        ```
  4. (Optional) Login to Huggingface to use gated models/datasets.
        ```bash
@@ -399,7 +292,7 @@ Please use WSL or Docker!

 Use the below instead of the install method in QuickStart.
 ```
-pip3 install --no-build-isolation -e '.'
+pip3 install -e '.'
 ```
 More info: [mac.md](/docs/mac.qmd)

@@ -789,6 +682,86 @@ See [this debugging guide](docs/debugging.qmd) for tips on debugging Axolotl, al

 ## Need help? 🙋

-Join our [Discord server](https://discord.gg/HhrNrHJPRb) where our community members can help you.
+Join our [Discord server](https://discord.gg/HhrNrHJPRb) where we our community members can help you.

-Need dedicated support? Please contact us at [✉️wing@axolotl.ai](ailto:wing@axolotl.ai) for dedicated support options.
+Need dedicated support? Please contact us at [✉️wing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org) for dedicated support options.
+
+## Badge ❤🏷️
+
+Building something cool with Axolotl? Consider adding a badge to your model card.
+
+```markdown
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+```
+
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+
+## Community Showcase
+
+Check out some of the projects and models that have been built using Axolotl! Have a model you'd like to add to our Community Showcase? Open a PR with your model.
+
+Open Access AI Collective
+- [Minotaur 13b](https://huggingface.co/openaccess-ai-collective/minotaur-13b-fixed)
+- [Manticore 13b](https://huggingface.co/openaccess-ai-collective/manticore-13b)
+- [Hippogriff 30b](https://huggingface.co/openaccess-ai-collective/hippogriff-30b-chat)
+
+PocketDoc Labs
+- [Dan's PersonalityEngine 13b LoRA](https://huggingface.co/PocketDoc/Dans-PersonalityEngine-13b-LoRA)
+
+## Contributing 🤝
+
+Please read the [contributing guide](./.github/CONTRIBUTING.md)
+
+Bugs? Please check the [open issues](https://github.com/axolotl-ai-cloud/axolotl/issues/bug) else create a new Issue.
+
+PRs are **greatly welcome**!
+
+Please run the quickstart instructions followed by the below to setup env:
+```bash
+pip3 install -r requirements-dev.txt -r requirements-tests.txt
+pre-commit install
+
+# test
+pytest tests/
+
+# optional: run against all files
+pre-commit run --all-files
+```
+
+Thanks to all of our contributors to date. Help drive open source AI progress forward by contributing to Axolotl.
+
+<a href="https://github.com/axolotl-ai-cloud/axolotl/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=openaccess-ai-collective/axolotl" alt="contributor chart by https://contrib.rocks"/>
+</a>
+
+## Sponsors 🤝❤
+
+OpenAccess AI Collective is run by volunteer contributors such as [winglian](https://github.com/winglian),
+[NanoCode012](https://github.com/NanoCode012), [tmm1](https://github.com/tmm1),
+[mhenrichsen](https://github.com/mhenrichsen), [casper-hansen](https://github.com/casper-hansen),
+[hamelsmu](https://github.com/hamelsmu) and many more who help us accelerate forward by fixing bugs, answering
+community questions and implementing new features. Axolotl needs donations from sponsors for the compute needed to
+run our unit & integration tests, troubleshooting community issues, and providing bounties. If you love axolotl,
+consider sponsoring the project via [GitHub Sponsors](https://github.com/sponsors/OpenAccess-AI-Collective),
+[Ko-fi](https://ko-fi.com/axolotl_ai) or reach out directly to
+[wing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org).
+
+---
+
+#### 💎 Diamond Sponsors - [Contact directly](mailto:wing@openaccessaicollective.org)
+
+---
+
+#### 🥇 Gold Sponsors - $5000/mo
+
+---
+
+#### 🥈 Silver Sponsors - $1000/mo
+
+---
+
+#### 🥉 Bronze Sponsors - $500/mo
+
+ - [JarvisLabs.ai](https://jarvislabs.ai)
+
+---
--- a/cicd/Dockerfile.jinja
+++ b/cicd/Dockerfile.jinja
@@ -4,6 +4,7 @@ ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
 ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
 ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}"
 ENV CUDA="{{ CUDA }}"
+ENV BNB_CUDA_VERSION="{{ CUDA }}"
 ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
 ENV GITHUB_REF="{{ GITHUB_REF }}"
 ENV GITHUB_SHA="{{ GITHUB_SHA }}"
@@ -31,9 +32,9 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
    fi

 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
    fi

 RUN python scripts/unsloth_install.py | sh
--- a/cicd/cicd.sh
+++ b/cicd/cicd.sh
@@ -1,10 +1,6 @@
 #!/bin/bash
 set -e

-python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
-
-pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
-# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
-pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
-pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
+pytest -v --durations=10 -n8 --ignore=tests/e2e/ /workspace/axolotl/tests/
+pytest -v --durations=10 -n1 --dist loadfile -v /workspace/axolotl/tests/e2e/patched/ /workspace/axolotl/tests/e2e/integrations/
 pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
--- a/deepspeed_configs/zero1_torch_compile.json
+++ b/deepspeed_configs/zero1_torch_compile.json
@@ -1,27 +0,0 @@
-{
-  "zero_optimization": {
-    "stage": 1,
-    "overlap_comm": true
-  },
-  "bf16": {
-    "enabled": "auto"
-  },
-  "fp16": {
-    "enabled": "auto",
-    "auto_cast": false,
-    "loss_scale": 0,
-    "initial_scale_power": 32,
-    "loss_scale_window": 1000,
-    "hysteresis": 2,
-    "min_loss_scale": 1
-  },
-  "compile": {
-    "disable": false,
-    "backend": "inductor"
-  },
-  "gradient_accumulation_steps": "auto",
-  "gradient_clipping": "auto",
-  "train_batch_size": "auto",
-  "train_micro_batch_size_per_gpu": "auto",
-  "wall_clock_breakdown": false
-}
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -5,6 +5,7 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
 ARG AXOLOTL_EXTRAS=""
 ARG AXOLOTL_ARGS=""
 ARG CUDA="118"
+ENV BNB_CUDA_VERSION=$CUDA
 ARG PYTORCH_VERSION="2.1.2"

 ENV PYTORCH_VERSION=$PYTORCH_VERSION
@@ -20,9 +21,9 @@ WORKDIR /workspace/axolotl

 # If AXOLOTL_EXTRAS is set, append it in brackets
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,optimizers,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,optimizers] $AXOLOTL_ARGS; \
    fi

 RUN python scripts/unsloth_install.py | sh
--- a/docker/Dockerfile-base
+++ b/docker/Dockerfile-base
@@ -16,7 +16,7 @@ ENV PYTHON_VERSION=$PYTHON_VERSION
 ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST

 RUN apt-get update \
-    && apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config && rm -rf /var/lib/apt/lists/* \
+    && apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev && rm -rf /var/lib/apt/lists/* \
    && wget \
    https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
    && mkdir /root/.conda \
--- a/docker/Dockerfile-cloud
+++ b/docker/Dockerfile-cloud
@@ -2,7 +2,7 @@ ARG BASE_TAG=main
 FROM axolotlai/axolotl:$BASE_TAG

 ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
-ENV HF_HUB_CACHE="/workspace/data/huggingface-cache/hub"
+ENV HUGGINGFACE_HUB_CACHE="/workspace/data/huggingface-cache/hub"
 ENV HF_HOME="/workspace/data/huggingface-cache/hub"
 ENV HF_HUB_ENABLE_HF_TRANSFER="1"

--- a/docker/Dockerfile-cloud-no-tmux
+++ b/docker/Dockerfile-cloud-no-tmux
@@ -2,7 +2,7 @@ ARG BASE_TAG=main
 FROM axolotlai/axolotl:$BASE_TAG

 ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
-ENV HF_HUB_CACHE="/workspace/data/huggingface-cache/hub"
+ENV HUGGINGFACE_HUB_CACHE="/workspace/data/huggingface-cache/hub"
 ENV HF_HOME="/workspace/data/huggingface-cache/hub"
 ENV HF_HUB_ENABLE_HF_TRANSFER="1"

--- a/docker/Dockerfile-tests
+++ b/docker/Dockerfile-tests
@@ -5,6 +5,7 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
 ARG AXOLOTL_EXTRAS=""
 ARG AXOLOTL_ARGS=""
 ARG CUDA="118"
+ENV BNB_CUDA_VERSION=$CUDA
 ARG PYTORCH_VERSION="2.1.2"
 ARG GITHUB_REF="main"

@@ -24,9 +25,9 @@ RUN git fetch origin +$GITHUB_REF && \

 # If AXOLOTL_EXTRAS is set, append it in brackets
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
    else \
-        pip install --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
+        pip install -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \
    fi

 # So we can test the Docker image
--- a/docs/amd_hpc.qmd
+++ b/docs/amd_hpc.qmd
@@ -52,7 +52,7 @@ export GPU_ARCHS="gfx90a"
 cd flash-attention
 export PYTHON_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
 patch "${PYTHON_SITE_PACKAGES}/torch/utils/hipify/hipify_python.py" hipify_patch.patch
-pip install --no-build-isolation .
+pip install .
 ```

 ### 6. Install Axolotl
@@ -63,7 +63,7 @@ Clone and install Axolotl:
 git clone https://github.com/axolotl-ai-cloud/axolotl
 cd axolotl
 pip install packaging ninja
-pip install --no-build-isolation -e .
+pip install -e .
 ```

 ### 7. Apply xformers Workaround
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -127,40 +127,34 @@ datasets:
    # - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
    # - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
    chat_template: tokenizer_default
-
-    # Custom jinja chat template. Used only if `chat_template: jinja` or empty.
+    # Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
    chat_template_jinja:
-
-    # Key containing the messages (default: "messages")
+    # The key in the data example that contains the messages. Default is "messages".
    field_messages: messages
-    # Key for role in each message (default: "role")
+    # The key in the message turn that contains the role. Default is "role".
    message_field_role: role
-    # Key for content in each message (default:  "content")
+    # The key in the message turn that contains the content. Default is "content".
    message_field_content: content
-
-    # Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
+    # Optional[Dict[str, List]]. Roles mapping for the messages.
    roles:
      user: ["human", "user"]
-      assistant: ["gpt", "assistant"]
+      assistant: ["gpt", "assistant", "ai"]
      system: ["system"]
-      tool: ["tool"]

-    # IMPORTANT: The following fields determine which parts of the conversation to train on.
-    # Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
-    # See examples at `docs/dataset-formats/conversation.qmd`
-    # Note: If the below 4 fields are empty, defaults to training only on the last message.
+    ## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.

    # Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
-    roles_to_train: ["assistant"]  # default
+    roles_to_train: ["gpt", "assistant"]
    # Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
    # - all: train on all EOS tokens
-    # - turn (default): train on the EOS token at the end of each trainable turn
+    # - turn: train on the EOS token at the end of each trainable turn
    # - last: train on the last EOS token in the conversation
    train_on_eos: last
    # The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
    message_field_training: training
    # The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
    # The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
+    # See example at `docs/dataset-formats/conversation.qmd`
    message_field_training_detail: train_detail


@@ -245,9 +239,6 @@ sample_packing_group_size: 100000
 # The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
 sample_packing_bin_size: 200

-# Use batch flattening for speedups when not using sample_packing
-batch_flattening:
-
 # Passed through to transformers when loading the model when launched without accelerate
 # Use `sequential` when training w/ model parallelism to limit memory
 device_map:
@@ -340,8 +331,7 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
 output_dir: ./completed-model

 # Whether to use torch.compile and which backend to use
-# setting to `auto` will enable torch compile when torch>=2.5.1
-torch_compile:  # Optional[Union[Literal["auto"], bool]]
+torch_compile:  # bool
 torch_compile_backend:  # Optional[str]

 # Training hyperparameters
@@ -373,10 +363,6 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
 eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
 eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]

-profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
-                # see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
-                # snapshots can be visualized @ https://pytorch.org/memory_viz
-
 loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
 loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)

--- a/docs/dataset-formats/conversation.qmd
+++ b/docs/dataset-formats/conversation.qmd
@@ -68,8 +68,6 @@ We recommend checking the below examples for other usecases.
 datasets:
  - path: ...
    type: chat_template
-    roles_to_train:
-    train_on_eos:
 ```

 2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
@@ -79,7 +77,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
 datasets:
  - path: ...
    type: chat_template
-    roles_to_train: ["assistant"]  # default value
+    roles_to_train: ["assistant"]
 ```

 3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
@@ -89,6 +87,7 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
 datasets:
  - path: ...
    type: chat_template
+    roles_to_train: ["assistant"]
 ```

 4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
@@ -100,6 +99,7 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
 datasets:
  - path: ...
    type: chat_template
+    roles_to_train: ["assistant"]
 ```

 5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
--- a/docs/debugging.qmd
+++ b/docs/debugging.qmd
@@ -71,7 +71,7 @@ Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/us

 ```bash
 pip3 install packaging
-pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
+pip3 install -e '.[flash-attn,deepspeed]'
 ```

 #### Remote Hosts
@@ -212,7 +212,7 @@ You will now be in the container.  Next, perform an editable install of Axolotl:

 ```bash
 pip3 install packaging
-pip3 install --no-build-isolation -e '.[flash-attn,deepspeed]'
+pip3 install -e '.[flash-attn,deepspeed]'
 ```

 ### Attach To Container
--- a/docs/lr_groups.qmd
+++ b/docs/lr_groups.qmd
@@ -1,29 +0,0 @@
---
-title: Learning Rate Groups
-description: "Setting different learning rates by module name"
---
-
-## Background
-
-Inspired by LoRA+, Axolotl allows practitioners to specify separate learning rates for each module or groups of
-modules in a model.
-
-## Example
-
-```yaml
-lr_groups:
-  - name: o_proj
-    modules:
-      - self_attn.o_proj.weight
-    lr: 1e-6
-  - name: q_proj
-    modules:
-      - model.layers.2.self_attn.q_proj.weight
-    lr: 1e-5
-
-learning_rate: 2e-5
-```
-
-In this example, we have a default learning rate of 2e-5 across the entire model, but we have a separate learning rate
-of 1e-6 for all the self attention `o_proj` modules across all layers, and a learning are of 1e-5 to the 3rd layer's
-self attention `q_proj` module.
--- a/docs/rlhf.qmd
+++ b/docs/rlhf.qmd
@@ -52,26 +52,6 @@ datasets:
    type: chat_template.argilla
 ```

-
-#### KTO
-
-```yaml
-rl: kto
-rl_beta: 0.5
-kto_desirable_weight: 0.2
-
-remove_unused_columns: false
-
-datasets:
-  - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto
-    type: llama3.ultra
-    split: train
-
-gradient_checkpointing: true
-gradient_checkpointing_kwargs:
-  use_reentrant: true
-```
-
 #### Using local dataset files
 ```yaml
 datasets:
--- a/examples/cerebras/btlm-ft.yml
+++ b/examples/cerebras/btlm-ft.yml
@@ -1,10 +1,6 @@
 base_model: cerebras/btlm-3b-8k-base
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: GPT2Tokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true
 tokenizer_use_fast: true
 tokenizer_legacy: true
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -1,7 +1,4 @@
 base_model: cerebras/Cerebras-GPT-1.3B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: true
 strict: false
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-13b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-13b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-34b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-34b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/colab-notebooks/colab-axolotl-example.ipynb
+++ b/examples/colab-notebooks/colab-axolotl-example.ipynb
@@ -24,7 +24,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "!pip install --no-build-isolation axolotl[deepspeed]"
+    "!pip install axolotl[deepspeed]"
   ]
  },
  {
--- a/examples/dbrx/16bit-lora.yaml
+++ b/examples/dbrx/16bit-lora.yaml
@@ -1,7 +1,4 @@
 base_model: LnL-AI/dbrx-base-converted-v2
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/dbrx/8bit-lora.yaml
+++ b/examples/dbrx/8bit-lora.yaml
@@ -1,7 +1,4 @@
 base_model: LnL-AI/dbrx-base-converted-v2
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: true
--- a/examples/dbrx/fft-ds-zero3.yaml
+++ b/examples/dbrx/fft-ds-zero3.yaml
@@ -1,7 +1,4 @@
 base_model: LnL-AI/dbrx-base-converted-v2
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/deepseek-v2/fft-fsdp-16b.yaml
+++ b/examples/deepseek-v2/fft-fsdp-16b.yaml
@@ -1,6 +1,4 @@
 base_model: deepseek-ai/DeepSeek-V2-Lite
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/deepseek-v2/qlora-fsdp-2_5.yaml
+++ b/examples/deepseek-v2/qlora-fsdp-2_5.yaml
@@ -1,7 +1,4 @@
 base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -1,12 +1,7 @@
 base_model: tiiuae/falcon-7b
-# optionally might have model_type or tokenizer_type
+trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
-trust_remote_code: true

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -1,15 +1,10 @@
 # 1b: tiiuae/falcon-rw-1b
 # 40b: tiiuae/falcon-40b
 base_model: tiiuae/falcon-7b
-# optionally might have model_type or tokenizer_type
-model_type: AutoModelForCausalLM
-tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
 trust_remote_code: true
-
+model_type: AutoModelForCausalLM
+tokenizer_type: AutoTokenizer

 load_in_8bit: false
 # enable 4bit for QLoRA
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -1,12 +1,7 @@
 base_model: tiiuae/falcon-7b
-# optionally might have model_type or tokenizer_type
+trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
-trust_remote_code: true

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/gemma/qlora.yml
+++ b/examples/gemma/qlora.yml
@@ -1,10 +1,7 @@
 # use google/gemma-7b if you have access
 base_model: mhenrichsen/gemma-7b
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/gemma2/qlora.yml
+++ b/examples/gemma2/qlora.yml
@@ -1,9 +1,6 @@
 base_model: google/gemma-2-9b
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/gemma2/reward-model.yaml
+++ b/examples/gemma2/reward-model.yaml
@@ -1,9 +1,6 @@
 base_model: google/gemma-2-2b
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForSequenceClassification
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -1,7 +1,4 @@
 base_model: EleutherAI/gpt-j-6b
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: true
 strict: false
--- a/examples/jamba/qlora.yaml
+++ b/examples/jamba/qlora.yaml
@@ -1,7 +1,4 @@
 base_model: ai21labs/Jamba-v0.1
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/jamba/qlora_deepspeed.yaml
+++ b/examples/jamba/qlora_deepspeed.yaml
@@ -1,6 +1,4 @@
 base_model: ai21labs/Jamba-v0.1
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/jamba/qlora_fsdp_large.yaml
+++ b/examples/jamba/qlora_fsdp_large.yaml
@@ -1,8 +1,5 @@
 base_model: ai21labs/AI21-Jamba-1.5-Large
-# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_4bit: true
 strict: false
--- a/examples/jeopardy-bot/config.yml
+++ b/examples/jeopardy-bot/config.yml
@@ -1,10 +1,6 @@
 base_model: huggyllama/llama-7b
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 datasets:
  - path: openaccess-ai-collective/jeopardy
--- a/examples/llama-2/fft_optimized.yml
+++ b/examples/llama-2/fft_optimized.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-2/gptq-lora.yml
+++ b/examples/llama-2/gptq-lora.yml
@@ -1,13 +1,8 @@
 base_model: TheBloke/Llama-2-7B-GPTQ
-# optionally might have model_type or tokenizer_type
-model_type: AutoModelForCausalLM
-tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 gptq: true
 gptq_disable_exllama: true
-
+model_type: AutoModelForCausalLM
+tokenizer_type: LlamaTokenizer
 tokenizer_use_fast: true
 tokenizer_legacy: true
 load_in_8bit: false
--- a/examples/llama-2/lisa.yml
+++ b/examples/llama-2/lisa.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-2/loftq.yml
+++ b/examples/llama-2/loftq.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-2/qlora-fsdp.yml
+++ b/examples/llama-2/qlora-fsdp.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-3-vision/lora-11b.yaml
+++ b/examples/llama-3-vision/lora-11b.yaml
@@ -1,9 +1,5 @@
 base_model: alpindale/Llama-3.2-11B-Vision-Instruct
-# optionally might have model_type or tokenizer_type or processor_type
 processor_type: AutoProcessor
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 strict: false

 # these 3 lines are needed for now to handle vision chat templates w images
--- a/examples/llama-3/fft-8b-liger-fsdp.yaml
+++ b/examples/llama-3/fft-8b-liger-fsdp.yaml
@@ -1,6 +1,4 @@
 base_model: NousResearch/Meta-Llama-3.1-8B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 plugins:
  - axolotl.integrations.liger.LigerPlugin
--- a/examples/llama-3/fft-8b.yaml
+++ b/examples/llama-3/fft-8b.yaml
@@ -1,6 +1,4 @@
 base_model: NousResearch/Meta-Llama-3.1-8B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-3/instruct-dpo-lora-8b.yml
+++ b/examples/llama-3/instruct-dpo-lora-8b.yml
@@ -1,9 +1,6 @@
 base_model: meta-llama/Meta-Llama-3-8B-Instruct
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/instruct-lora-8b.yml
+++ b/examples/llama-3/instruct-lora-8b.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Meta-Llama-3-8B-Instruct
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/lora-1b-deduplicate-dpo.yml
+++ b/examples/llama-3/lora-1b-deduplicate-dpo.yml
@@ -1,9 +1,6 @@
 base_model: meta-llama/Llama-3.2-1B
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/lora-1b-deduplicate-sft.yml
+++ b/examples/llama-3/lora-1b-deduplicate-sft.yml
@@ -1,9 +1,6 @@
 base_model: meta-llama/Llama-3.2-1B
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/lora-1b.yml
+++ b/examples/llama-3/lora-1b.yml
@@ -1,76 +0,0 @@
-base_model: NousResearch/Llama-3.2-1B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-load_in_8bit: false
-load_in_4bit: false
-strict: false
-
-datasets:
-  - path: teknium/GPT4-LLM-Cleaned
-    type: alpaca
-dataset_prepared_path: last_run_prepared
-val_set_size: 0.1
-output_dir: ./outputs/lora-out
-
-adapter: lora
-lora_model_dir:
-
-sequence_len: 2048
-sample_packing: true
-eval_sample_packing: true
-pad_to_sequence_len: true
-
-lora_r: 16
-lora_alpha: 32
-lora_dropout: 0.05
-lora_fan_in_fan_out:
-lora_target_modules:
-  - gate_proj
-  - down_proj
-  - up_proj
-  - q_proj
-  - v_proj
-  - k_proj
-  - o_proj
-
-wandb_project:
-wandb_entity:
-wandb_watch:
-wandb_name:
-wandb_log_model:
-
-gradient_accumulation_steps: 2
-micro_batch_size: 2
-num_epochs: 1
-optimizer: adamw_8bit
-lr_scheduler: cosine
-learning_rate: 0.0002
-
-train_on_inputs: false
-group_by_length: false
-bf16: auto
-fp16:
-tf32: false
-
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-
-loss_watchdog_threshold: 5.0
-loss_watchdog_patience: 3
-
-warmup_steps: 10
-evals_per_epoch: 4
-saves_per_epoch: 1
-debug:
-deepspeed:
-weight_decay: 0.0
-fsdp:
-fsdp_config:
-special_tokens:
-  pad_token: "<|end_of_text|>"
--- a/examples/llama-3/lora-8b.yml
+++ b/examples/llama-3/lora-8b.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Meta-Llama-3-8B
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/qlora-1b-kto.yaml
+++ b/examples/llama-3/qlora-1b-kto.yaml
@@ -1,77 +0,0 @@
-base_model: meta-llama/Llama-3.2-1B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
-load_in_8bit: false
-load_in_4bit: true
-strict: false
-
-rl: kto
-rl_beta: 0.5
-kto_desirable_weight: 0.2
-
-datasets:
-  - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto
-    type: llama3.ultra
-    split: train
-dataset_prepared_path: last_run_prepared
-val_set_size: 0.0
-output_dir: ./outputs/qlora-out
-
-remove_unused_columns: false
-
-adapter: qlora
-lora_model_dir:
-
-sequence_len: 2048
-sample_packing: false  # not supported with kto
-eval_sample_packing: false
-pad_to_sequence_len: false
-
-lora_r: 32
-lora_alpha: 64
-lora_dropout: 0.05
-lora_target_linear: true
-lora_fan_in_fan_out:
-
-wandb_project:
-wandb_entity:
-wandb_watch:
-wandb_name:
-wandb_log_model:
-
-gradient_accumulation_steps: 1
-micro_batch_size: 2
-num_epochs: 1
-optimizer: adamw_8bit
-lr_scheduler: cosine
-learning_rate: 0.0002
-
-train_on_inputs: false
-group_by_length: false
-bf16: auto
-fp16:
-tf32: true
-
-gradient_checkpointing: true
-gradient_checkpointing_kwargs:
-  use_reentrant: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-
-warmup_steps: 20
-evals_per_epoch: 4
-eval_table_size:
-eval_max_new_tokens: 128
-saves_per_epoch: 1
-debug:
-deepspeed:
-weight_decay: 0.0
-fsdp:
-fsdp_config:
-special_tokens:
-  pad_token: "<|end_of_text|>"
--- a/examples/llama-3/qlora-1b.yml
+++ b/examples/llama-3/qlora-1b.yml
@@ -1,6 +1,4 @@
-base_model: NousResearch/Llama-3.2-1B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
+base_model: meta-llama/Llama-3.2-1B

 load_in_8bit: false
 load_in_4bit: true
@@ -24,6 +22,7 @@ pad_to_sequence_len: true
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
+lora_target_linear: true
 lora_fan_in_fan_out:
 lora_target_modules:
  - gate_proj
--- a/examples/llama-3/qlora-fsdp-405b.yaml
+++ b/examples/llama-3/qlora-fsdp-405b.yaml
@@ -1,8 +1,5 @@
 base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
-# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_4bit: true
 strict: false
--- a/examples/llama-3/qlora-fsdp-70b.yaml
+++ b/examples/llama-3/qlora-fsdp-70b.yaml
@@ -1,9 +1,6 @@
 base_model: casperhansen/llama-3-70b-fp16
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer  # PreTrainedTokenizerFast
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-3/qlora.yml
+++ b/examples/llama-3/qlora.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Meta-Llama-3-8B
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llava/lora-7b.yaml
+++ b/examples/llava/lora-7b.yaml
@@ -0,0 +1,63 @@
+base_model: llava-hf/llava-1.5-7b-hf
+processor_type: AutoProcessor
+strict: false
+
+# these 3 lines are needed for now to handle vision chat templates w images
+skip_prepare_dataset: true
+remove_unused_columns: false
+sample_packing: false
+
+chat_template: llava
+datasets:
+  - path: HuggingFaceH4/llava-instruct-mix-vsft
+    type: chat_template
+    split: train[:1%]
+    field_messages: messages
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.0
+output_dir: ./outputs/out
+
+adapter: lora
+lora_model_dir:
+
+sequence_len: 8192
+pad_to_sequence_len: false
+
+lora_r: 32
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules: 'language_model.model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 1
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+train_on_inputs: false
+group_by_length: false
+bf16: true
+fp16:
+tf32: true
+
+gradient_checkpointing: true
+local_rank:
+logging_steps: 1
+flash_attention: true
+eager_attention:
+
+warmup_ratio: 0.1
+evals_per_epoch: 1
+saves_per_epoch: 1
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
--- a/examples/mamba/config.yml
+++ b/examples/mamba/config.yml
@@ -1,10 +1,7 @@
 base_model: state-spaces/mamba-2.8b
-# optionally might have model_type or tokenizer_type or tokenizer_config
 model_type: MambaLMHeadModel
 tokenizer_type: AutoTokenizer
 tokenizer_config: EleutherAI/gpt-neox-20b
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/mistral/bigstral-ds-zero3.yaml
+++ b/examples/mistral/bigstral-ds-zero3.yaml
@@ -1,10 +1,6 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/mistral/lora-mps.yml
+++ b/examples/mistral/lora-mps.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/mistral/lora.yml
+++ b/examples/mistral/lora.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/mistral/mistral-dpo-qlora.yml
+++ b/examples/mistral/mistral-dpo-qlora.yml
@@ -4,11 +4,8 @@
 #face problems with the special tokens.

 base_model: mistralai/Mistral-7B-Instruct-v0.2
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mistral/mistral-qlora-fsdp.yml
+++ b/examples/mistral/mistral-qlora-fsdp.yml
@@ -1,10 +1,6 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/mistral-qlora-orpo.yml
+++ b/examples/mistral/mistral-qlora-orpo.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
@@ -1,9 +1,6 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mistral/mixtral-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-qlora-fsdp.yml
@@ -1,10 +1,6 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/mixtral.yml
+++ b/examples/mistral/mixtral.yml
@@ -1,10 +1,6 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/mixtral_22.yml
+++ b/examples/mistral/mixtral_22.yml
@@ -1,10 +1,6 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
-# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -1,9 +1,5 @@
 base_model: mosaicml/mpt-7b
-# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true  # required for mpt as their model class is not merged into transformers yet
 load_in_8bit: false
 datasets:
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -1,10 +1,6 @@
 base_model: openlm-research/open_llama_3b_v2
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: false
 strict: false
--- a/examples/openllama-3b/lora.yml
+++ b/examples/openllama-3b/lora.yml
@@ -1,10 +1,6 @@
 base_model: openlm-research/open_llama_3b_v2
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: true
 load_in_4bit: false
 strict: false
--- a/examples/openllama-3b/qlora.yml
+++ b/examples/openllama-3b/qlora.yml
@@ -1,10 +1,6 @@
 base_model: openlm-research/open_llama_3b_v2
-# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: true
 strict: false
--- a/examples/phi/lora-3.5.yaml
+++ b/examples/phi/lora-3.5.yaml
@@ -1,9 +1,6 @@
 base_model: microsoft/Phi-3.5-mini-instruct
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: true
 load_in_4bit: false
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/phi-1_5
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/phi-1_5
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: true
--- a/examples/phi/phi2-ft.yml
+++ b/examples/phi/phi2-ft.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/phi-2
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/phi/phi3-ft-fsdp.yml
+++ b/examples/phi/phi3-ft-fsdp.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/Phi-3-mini-4k-instruct
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 load_in_8bit: false
 load_in_4bit: false
--- a/examples/phi/phi3-ft.yml
+++ b/examples/phi/phi3-ft.yml
@@ -1,11 +1,7 @@
 base_model: microsoft/Phi-3-mini-4k-instruct
-# optionally might have model_type or tokenizer_type
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 chat_template: phi_3

 load_in_8bit: false
--- a/examples/pixtral/lora-12b.yml
+++ b/examples/pixtral/lora-12b.yml
@@ -0,0 +1,65 @@
+base_model: mistral-community/pixtral-12b
+processor_type: AutoProcessor
+strict: false
+
+# these 3 lines are needed for now to handle vision chat templates w images
+skip_prepare_dataset: true
+remove_unused_columns: false
+sample_packing: false
+
+chat_template: pixtral
+datasets:
+  - path: HuggingFaceH4/llava-instruct-mix-vsft
+    type: chat_template
+    split: train[:1%]
+    field_messages: messages
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.0
+output_dir: ./outputs/out
+
+adapter: lora
+lora_model_dir:
+
+sequence_len: 8192
+pad_to_sequence_len: false
+
+lora_r: 32
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules: 'language_model.model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 1
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+train_on_inputs: false
+group_by_length: false
+bf16: true
+fp16:
+tf32: true
+
+gradient_checkpointing: true
+local_rank:
+logging_steps: 1
+flash_attention: false # PixtralVisionModel does not support Flash Attention 2.0 yet
+eager_attention:
+
+warmup_ratio: 0.1
+evals_per_epoch: 1
+saves_per_epoch: 1
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
+special_tokens:
+  pad_token: <|end_of_text|>
--- a/examples/pythia-12b/config.yml
+++ b/examples/pythia-12b/config.yml
@@ -1,11 +1,7 @@
 base_model: EleutherAI/pythia-12b-deduped
 base_model_ignore_patterns: pytorch*  # prefer safetensors
-# optionally might have model_type or tokenizer_type
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: false
 load_in_4bit: false
 gptq: false
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -1,7 +1,4 @@
 base_model: EleutherAI/pythia-1.4b-deduped
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 load_in_8bit: true
 datasets:
  - path: teknium/GPT4-LLM-Cleaned
--- a/examples/qwen/lora.yml
+++ b/examples/qwen/lora.yml
@@ -1,9 +1,6 @@
 base_model: Qwen/Qwen-7B
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 trust_remote_code: true

--- a/examples/qwen/qlora.yml
+++ b/examples/qwen/qlora.yml
@@ -1,9 +1,6 @@
 base_model: Qwen/Qwen-7B
-# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 trust_remote_code: true

--- a/examples/qwen/qwen2-moe-lora.yaml
+++ b/examples/qwen/qwen2-moe-lora.yaml
@@ -1,7 +1,4 @@
 base_model: Qwen/Qwen1.5-MoE-A2.7B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/qwen/qwen2-moe-qlora.yaml
+++ b/examples/qwen/qwen2-moe-qlora.yaml
@@ -1,7 +1,4 @@
 base_model: Qwen/Qwen1.5-MoE-A2.7B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/examples/qwen2-vl/lora-7b.yaml
+++ b/examples/qwen2-vl/lora-7b.yaml
@@ -0,0 +1,63 @@
+base_model: Qwen/Qwen2-VL-7B-Instruct
+processor_type: AutoProcessor
+strict: false
+
+# these 3 lines are needed for now to handle vision chat templates w images
+skip_prepare_dataset: true
+remove_unused_columns: false
+sample_packing: false
+
+chat_template: qwen2_vl
+datasets:
+  - path: HuggingFaceH4/llava-instruct-mix-vsft
+    type: chat_template
+    split: train[:1%]
+    field_messages: messages
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.0
+output_dir: ./outputs/out
+
+adapter: lora
+lora_model_dir:
+
+sequence_len: 8192
+pad_to_sequence_len: false
+
+lora_r: 32
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules: 'model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 1
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+train_on_inputs: false
+group_by_length: false
+bf16: true
+fp16:
+tf32: true
+
+gradient_checkpointing: true
+local_rank:
+logging_steps: 1
+flash_attention: true
+eager_attention:
+
+warmup_ratio: 0.1
+evals_per_epoch: 1
+saves_per_epoch: 1
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
--- a/examples/qwen2/dpo.yaml
+++ b/examples/qwen2/dpo.yaml
@@ -1,6 +1,4 @@
 base_model: Qwen/Qwen2.5-0.5B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name

 strict: false

--- a/examples/qwen2/qlora-fsdp.yaml
+++ b/examples/qwen2/qlora-fsdp.yaml
@@ -1,7 +1,4 @@
 base_model: Qwen/Qwen2-7B
-# Automatically upload checkpoint and final model to HF
-# hub_model_id: username/custom_model_name
-
 trust_remote_code: true

 load_in_8bit: false
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
NanoCode012	fbf3ca86c9	feat: add support for qwen25 vl for multimodal	2025-02-18 12:42:29 +07:00
Sunny	2de866e92f	revert seq len to 8192	2024-12-08 22:30:20 -05:00
Sunny	295e07dcca	settings	2024-12-08 22:22:18 -05:00
bursteratom	3c07b6d6b1	lint	2024-12-06 16:06:57 -05:00
bursteratom	89dae7dc6d	lora_target_module	2024-12-06 15:41:09 -05:00
bursteratom	1b54af8e54	lora config	2024-12-06 15:27:18 -05:00
bursteratom	ca7b56cba3	lora config	2024-12-06 15:26:06 -05:00
bursteratom	ea8269d2eb	lora config	2024-12-06 15:23:24 -05:00
bursteratom	13ca7ed087	comment out lora target	2024-12-06 15:21:08 -05:00
bursteratom	0dfd8541ee	lora config qwen2vl	2024-12-06 14:56:51 -05:00
bursteratom	75e1d3537f	qwen2_vl get_text_config	2024-12-06 14:54:06 -05:00
bursteratom	2b7f3bd6ab	qwen2_vl get_text_config	2024-12-06 14:52:17 -05:00
bursteratom	d85a229afe	get_text_config	2024-12-06 14:50:05 -05:00
bursteratom	355cd7c872	update is_multimodal requirement to include qwen2_vl	2024-12-06 14:43:50 -05:00
bursteratom	eab1638686	lint	2024-12-06 14:37:32 -05:00
bursteratom	a3a4d22709	config init qwen2-vl chat template	2024-12-06 14:24:03 -05:00
bursteratom	f9eb7d8663	qwen2 example	2024-12-06 14:22:08 -05:00
bursteratom	343771a6d3	lint	2024-12-06 13:15:49 -05:00
bursteratom	d2c32d0cba	lint	2024-12-06 13:04:42 -05:00
bursteratom	cec9887609	add llava chat template to config	2024-12-06 12:57:20 -05:00
bursteratom	88b2cae748	llava template	2024-12-06 12:54:43 -05:00
bursteratom	aea2565938	for test only	2024-12-06 11:54:07 -05:00
bursteratom	1ad56303b2	lint	2024-12-05 15:34:04 -05:00
bursteratom	dc055a4ef7	lint	2024-12-05 14:59:51 -05:00
bursteratom	169116a50f	llava example	2024-12-05 12:58:30 -05:00
bursteratom	43e412f660	comment	2024-12-04 13:18:25 -05:00
Wing Lian	7aa57803e1	fix optimizer reset for relora sft (#1414 ) * fix optimizer reset * set states to reset for 8bit optimizers and handle quantile runtime error for embeddings * fix relora test to check grad_norm * use flash attn for relora and tweak hyperparams for test * fix messages field for test dataset	2024-12-04 12:33:29 -05:00
NanoCode012	1969fa3bf0	fix(readme): update cuda instructions during preprocess (#2114 ) [skip ci]	2024-12-04 12:33:29 -05:00
NanoCode012	4078f37076	feat: add cut_cross_entropy (#2091 ) * feat: add cut_cross_entropy * fix: add to input * fix: remove from setup.py * feat: refactor into an integration * chore: ignore lint * feat: add test for cce * fix: set max_steps for liger test * chore: Update base model following suggestion Co-authored-by: Wing Lian <wing.lian@gmail.com> * chore: update special_tokens following suggestion Co-authored-by: Wing Lian <wing.lian@gmail.com> * chore: remove with_temp_dir following comments * fix: plugins aren't loaded * chore: update quotes in error message * chore: lint * chore: lint * feat: enable FA on test * chore: refactor get_pytorch_version * fix: lock cce commit version * fix: remove subclassing UT * fix: downcast even if not using FA and config check * feat: add test to check different attentions * feat: add install to CI * chore: refactor to use parametrize for attention * fix: pytest not detecting test * feat: handle torch lower than 2.4 * fix args/kwargs to match docs * use release version cut-cross-entropy==24.11.4 * fix quotes * fix: use named params for clarity for modal builder * fix: handle install from pip * fix: test check only top level module install * fix: re-add import check * uninstall existing version if no transformers submodule in cce * more dataset fixtures into the cache --------- Co-authored-by: Wing Lian <wing.lian@gmail.com> Co-authored-by: Wing Lian <wing@axolotl.ai>	2024-12-04 12:33:29 -05:00
Wing Lian	f073af6d99	fix merge conflict of duplicate max_steps in config for relora (#2116 )	2024-12-04 12:33:29 -05:00
Wing Lian	139d2612fa	fix so inference can be run against quantized models without adapters (#1834 ) * fix so inference can be run against quantized models without adapters * Update error msg [skip e2e] Co-authored-by: NanoCode012 <nano@axolotl.ai> --------- Co-authored-by: NanoCode012 <nano@axolotl.ai>	2024-12-04 12:33:29 -05:00
Sunny Liu	20573fd13e	Add ds model card, rebased (#2101 ) [skip ci] * rebased add_ds_model_card * manual rebasing * fix redundancy * lint * include case when ds_tag is none * conform to kwargs in create_model_card	2024-12-04 12:33:29 -05:00
NanoCode012	2b7b4af81c	fix(vlm): handle legacy conversation data format and check image in data (#2018 ) [skip ci] * fix: handle legacy conversation data format and check image in data * feat: add test for llama vision * feat: add max_steps to test * fix: incorrect indent and return preprocess * feat: use smaller model and dataset * chore: add extra config for sharegpt dataset	2024-12-04 12:33:29 -05:00
Sunny Liu	d56260c8d5	Check torch version for ADOPT optimizer + integrating new ADOPT updates (#2104 ) * added torch check for adopt, wip * lint * gonna put torch version checking somewhere else * added ENVcapabilities class for torch version checking * lint + pydantic * ENVCapabilities -> EnvCapabilities * forgot to git add v0_4_1/__init__.py * removed redundancy * add check if env_capabilities not specified * make env_capabilities compulsory [skip e2e] * fixup env_capabilities * modified test_validation.py to accomodate env_capabilities * adopt torch version test [skip e2e] * raise error * test correct torch version * test torch version above requirement * Update src/axolotl/utils/config/models/input/v0_4_1/__init__.py Co-authored-by: Wing Lian <wing.lian@gmail.com> * removed unused is_totch_min --------- Co-authored-by: Wing Lian <wing@axolotl.ai> Co-authored-by: Wing Lian <wing.lian@gmail.com>	2024-12-04 12:33:29 -05:00
Wing Lian	cac785ec0e	use pytest sugar and verbose for more info during ci (#2112 ) [skip ci] * use pytest sugar and verbose for more info during ci * also run test suite when test requirements or cicd.sh changes * also on PR too	2024-12-04 12:33:29 -05:00
Wing Lian	e62991edef	make the eval size smaller for the resume test (#2111 ) [skip ci]	2024-12-04 12:33:29 -05:00
Wing Lian	fd9e7b55f6	build causal_conv1d and mamba-ssm into the base image (#2113 ) * build causal_conv1d and mamba-ssm into the base image * also build base images on changes to Dockerfile-base and base workflow yaml	2024-12-04 12:33:29 -05:00
Wing Lian	c0c53eb62f	various tests fixes for flakey tests (#2110 ) * add mhenrichsen/alpaca_2k_test with revision dataset download fixture for flaky tests * log slowest tests * pin pynvml==11.5.3 * fix load local hub path * optimize for speed w smaller models and val_set_size * replace pynvml * make the resume from checkpoint e2e faster * make tests smaller	2024-12-04 12:33:29 -05:00
Oliver Molenschot	b0fbd4d11d	Add Exact Deduplication Feature to Preprocessing Pipeline (#2072 ) * Add example YAML file for training Mistral using DPO * added deduplication code * Add exact deduplication feature and update examples * Improve deduplication for train/eval overlap Changed the deduplication function to use a more memory-efficient hashing method. Applied Git suggestions to improve clarity and maintainability.\n\nThe deduplication now handles cases where train and eval datasets have overlapping elements. * Improve deduplication for train/eval overlap Changed the deduplication function to use a more memory-efficient hashing method. Applied Git suggestions to improve clarity and maintainability.\n\nThe deduplication now handles cases where train and eval datasets have overlapping elements. * Apply suggestions from code review To handle the original case where we do not do deduplication Co-authored-by: Wing Lian <wing.lian@gmail.com> * Improve false collision detection to ensure dataset integrity - Added test cases to simulate and verify handling of forced hash collisions between datasets. - Ensured that datasets with identical hashes but different content are correctly identified, preventing incorrect deduplication. - Updated unit tests to include scenarios where collisions occur across both training and evaluation datasets, as well as within a single dataset. * Moved the constants file to the tests folder - Relocated `constants.py` to the `tests` folder to improve modularity and maintain a clear separation between source and test files. - Renamed `cicd/tests.py` to `cicd/cicd_tests.py` to resolve a conflict with `tests/__init__.py`, which caused Mypy to fail due to duplicate module names. - Updated all references to `cicd.tests` in the codebase to `cicd.cicd_tests` to reflect the renaming and ensure compatibility. - These changes ensure Mypy passes the pre-commit hook and maintain alignment with the project's structure. * revert some changes from previous commit and fix relative import --------- Co-authored-by: Wing Lian <wing.lian@gmail.com> Co-authored-by: Wing Lian <wing@axolotl.ai>	2024-12-04 12:33:29 -05:00
Wing Lian	1a70d4d6a4	add e2e tests for Unsloth qlora and test the builds (#2093 ) * see if unsloth installs cleanly in ci * check unsloth install on regular tests, not sdist * fix ampere check exception for ci * use cached_property instead * add an e2e test for unsloth qlora * reduce seq len and mbsz to prevent oom in ci * add checks for fp16 and sdp_attention * pin unsloth to a specific release * add unsloth to docker image too * fix flash attn xentropy patch * fix loss, add check for loss when using fa_xentropy * fix special tokens for test * typo * test fa xentropy with and without gradient accum * pr feedback changes	2024-12-04 12:33:29 -05:00
Wing Lian	d8787a433f	support seperate lr for embeddings, similar to loraplus (#1910 ) [skip ci] * support seperate lr for embeddings, similar to loraplus * add test case for train w lr embedding scale * use kwarg for optimizer * make sure to handle the optimizer creation * make sure to handle for embedding_lr too * use smollm for e2e, check for embeddings lr first before wdecay	2024-12-04 12:33:29 -05:00
NanoCode012	e775422269	fix: ds3 and fsdp lmbench eval (#2102 ) [ski[p ci] * fix: ds3 and fsdp lmbench eval * chore: update comment * fix: test signature	2024-12-04 12:33:29 -05:00
Wing Lian	97178f5960	add finetome dataset to fixtures, check eval_loss in test (#2106 ) [skip ci] * add finetome dataset to fixtures, check eval_loss in test * add qwen 0.5b to pytest session fixture	2024-12-04 12:33:29 -05:00
bursteratom	4698eed43f	set pixtral chat template	2024-12-04 12:11:21 -05:00
bursteratom	f84c3b37e7	lint	2024-12-04 11:59:45 -05:00
bursteratom	c39971c659	stuff	2024-11-27 10:52:36 -05:00
bursteratom	33a178c788	val config pixtral chat template	2024-11-27 10:36:23 -05:00
bursteratom	db15605e7e	pixral chat template	2024-11-27 10:34:19 -05:00
bursteratom	9e112bc8b5	lint	2024-11-27 10:33:35 -05:00
bursteratom	e038410778	lint	2024-11-27 10:24:37 -05:00
bursteratom	f4385c3cf4	add special tokens	2024-11-27 10:18:45 -05:00
bursteratom	d58c772df6	pixtral flash-attn false	2024-11-27 10:16:17 -05:00
bursteratom	69265a53b5	stuff	2024-11-27 09:53:41 -05:00