Compare commits
23 Commits
feat/phi_3
...
v0.11.0.po
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c620a218b8 | ||
|
|
c6d69d5c1b | ||
|
|
4ff96a2526 | ||
|
|
89e99eaaa7 | ||
|
|
6ed501f6dc | ||
|
|
8c6a6ea6eb | ||
|
|
78bff4925e | ||
|
|
b237c8a3f3 | ||
|
|
1032e22650 | ||
|
|
d68cc1e8ab | ||
|
|
21f1bf4805 | ||
|
|
de2c5ba103 | ||
|
|
9c0d7ee761 | ||
|
|
22d4a838dc | ||
|
|
a108e5db56 | ||
|
|
faff0cff41 | ||
|
|
759cefb741 | ||
|
|
69cd49a7aa | ||
|
|
5a961ecadf | ||
|
|
b37ddf9778 | ||
|
|
bf38e507fb | ||
|
|
a5946ff1f0 | ||
|
|
70ca1b2291 |
14
.github/workflows/base.yml
vendored
14
.github/workflows/base.yml
vendored
@@ -5,11 +5,13 @@ on:
|
||||
branches:
|
||||
- "main"
|
||||
paths:
|
||||
- 'Dockerfile-base'
|
||||
- 'docker/Dockerfile-base'
|
||||
- 'docker/Dockerfile-uv-base'
|
||||
- '.github/workflows/base.yml'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'Dockerfile-base'
|
||||
- 'docker/Dockerfile-base'
|
||||
- 'docker/Dockerfile-uv-base'
|
||||
- '.github/workflows/base.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
@@ -27,11 +29,11 @@ jobs:
|
||||
cuda_version: 12.4.1
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.5.1
|
||||
pytorch: 2.6.0
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-base"
|
||||
- cuda: "124"
|
||||
cuda_version: 12.4.1
|
||||
- cuda: "126"
|
||||
cuda_version: 12.6.3
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
@@ -41,7 +43,7 @@ jobs:
|
||||
cuda_version: 12.6.3
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
pytorch: 2.7.0
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-base"
|
||||
- cuda: "126"
|
||||
|
||||
20
.github/workflows/main.yml
vendored
20
.github/workflows/main.yml
vendored
@@ -15,15 +15,15 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.5.1
|
||||
axolotl_extras:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.7.0
|
||||
axolotl_extras: vllm
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
@@ -82,17 +82,17 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.5.1
|
||||
axolotl_extras:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
is_latest: true
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.7.0
|
||||
axolotl_extras:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
|
||||
7
.github/workflows/multi-gpu-e2e.yml
vendored
7
.github/workflows/multi-gpu-e2e.yml
vendored
@@ -33,13 +33,6 @@ jobs:
|
||||
axolotl_extras:
|
||||
num_gpus: 2
|
||||
nightly_build: "true"
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.5.1
|
||||
axolotl_extras:
|
||||
num_gpus: 2
|
||||
nightly_build: "true"
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
|
||||
11
.github/workflows/nightlies.yml
vendored
11
.github/workflows/nightlies.yml
vendored
@@ -12,11 +12,6 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.5.1
|
||||
axolotl_extras:
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
python_version: "3.11"
|
||||
@@ -68,10 +63,10 @@ jobs:
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.5.1
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
axolotl_extras:
|
||||
|
||||
123
.github/workflows/tests-nightly.yml
vendored
123
.github/workflows/tests-nightly.yml
vendored
@@ -18,116 +18,26 @@ jobs:
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
|
||||
preload-cache:
|
||||
name: Preload HF cache
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python_version: ["3.11"]
|
||||
pytorch_version: ["2.6.0"]
|
||||
timeout-minutes: 20
|
||||
|
||||
env:
|
||||
AXOLOTL_IS_CI_CACHE_PRELOAD: "1"
|
||||
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Restore HF cache
|
||||
id: hf-cache-restore
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: |
|
||||
/home/runner/.cache/huggingface/hub/datasets--*
|
||||
/home/runner/.cache/huggingface/hub/models--*
|
||||
key: ${{ runner.os }}-hf-hub-cache-v2
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
cache: 'pip' # caching pip dependencies
|
||||
|
||||
- name: upgrade pip
|
||||
run: |
|
||||
pip3 install --upgrade pip
|
||||
pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
|
||||
|
||||
- name: Install PyTorch
|
||||
run: |
|
||||
pip3 install torch==${{ matrix.pytorch_version }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip3 show torch
|
||||
pip3 install --no-build-isolation -U -e .
|
||||
python scripts/unsloth_install.py | sh
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
pip3 install -r requirements-dev.txt -r requirements-tests.txt
|
||||
|
||||
- name: Make sure PyTorch version wasn't clobbered
|
||||
run: |
|
||||
python -c "import torch; assert '${{ matrix.pytorch_version }}' in torch.__version__"
|
||||
|
||||
- name: Ensure axolotl CLI was installed
|
||||
run: |
|
||||
axolotl --help
|
||||
|
||||
- name: Pre-Download dataset fixture
|
||||
run: |
|
||||
huggingface-cli download --repo-type=dataset axolotl-ai-internal/axolotl-oss-dataset-fixtures
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest -v tests/conftest.py
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
files: ./coverage.xml
|
||||
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
||||
fail_ci_if_error: false
|
||||
|
||||
- name: cleanup pip cache
|
||||
run: |
|
||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||
|
||||
- name: Save HF cache
|
||||
id: hf-cache
|
||||
uses: actions/cache/save@v4
|
||||
with:
|
||||
path: |
|
||||
/home/runner/.cache/huggingface/hub/datasets--*
|
||||
/home/runner/.cache/huggingface/hub/models--*
|
||||
key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
|
||||
|
||||
pytest:
|
||||
name: PyTest
|
||||
runs-on: ubuntu-latest
|
||||
needs: [preload-cache]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
python_version: ["3.11"]
|
||||
pytorch_version: ["2.5.1", "2.6.0", "2.7.0"]
|
||||
pytorch_version: ["2.6.0", "2.7.0"]
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Restore HF cache
|
||||
id: hf-cache-restore
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: |
|
||||
/home/runner/.cache/huggingface/hub/datasets--*
|
||||
/home/runner/.cache/huggingface/hub/models--*
|
||||
key: ${{ runner.os }}-hf-hub-cache-v2
|
||||
- name: Restore Cache from S3
|
||||
id: hf-cache-restore-s3
|
||||
run: |
|
||||
mkdir -p /home/runner/.cache/huggingface/hub
|
||||
curl -L https://d1dttdx32dkk5p.cloudfront.net/hf-cache.tar.zst | tar -xf - -C /home/runner/.cache/huggingface/hub/ --use-compress-program unzstd
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
@@ -168,15 +78,11 @@ jobs:
|
||||
run: |
|
||||
axolotl --help
|
||||
|
||||
- name: Pre-Download dataset fixture
|
||||
run: |
|
||||
huggingface-cli download --repo-type=dataset axolotl-ai-internal/axolotl-oss-dataset-fixtures
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/
|
||||
pytest -v tests/patched/
|
||||
pytest -v tests/cli/
|
||||
pytest -v --durations=10 -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/
|
||||
pytest -v --durations=10 tests/patched/
|
||||
pytest -v --durations=10 tests/cli/
|
||||
|
||||
- name: cleanup pip cache
|
||||
run: |
|
||||
@@ -193,15 +99,8 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.5.1
|
||||
num_gpus: 1
|
||||
axolotl_extras:
|
||||
nightly_build: "true"
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
num_gpus: 1
|
||||
|
||||
30
.github/workflows/tests.yml
vendored
30
.github/workflows/tests.yml
vendored
@@ -52,7 +52,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python_version: ["3.11"]
|
||||
pytorch_version: ["2.5.1", "2.6.0", "2.7.1"]
|
||||
pytorch_version: ["2.6.0", "2.7.0", "2.7.1"]
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
@@ -102,9 +102,9 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/ --cov=axolotl --cov-report=xml
|
||||
pytest -v tests/patched/ --cov=axolotl --cov-append --cov-report=xml
|
||||
pytest -v tests/cli/ --cov=axolotl --cov-append --cov-report=xml
|
||||
pytest -v --durations=10 -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/ --cov=axolotl --cov-report=xml
|
||||
pytest -v --durations=10 tests/patched/ --cov=axolotl --cov-append --cov-report=xml
|
||||
pytest -v --durations=10 tests/cli/ --cov=axolotl --cov-append --cov-report=xml
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
@@ -125,7 +125,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python_version: ["3.11"]
|
||||
pytorch_version: ["2.5.1", "2.6.0", "2.7.1"]
|
||||
pytorch_version: ["2.6.0", "2.7.0", "2.7.1"]
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
@@ -175,9 +175,9 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
pytest -v -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/
|
||||
pytest -v tests/patched/
|
||||
pytest -v tests/cli/
|
||||
pytest -v --durations=10 -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ --ignore=tests/cli/ tests/
|
||||
pytest -v --durations=10 tests/patched/
|
||||
pytest -v --durations=10 tests/cli/
|
||||
|
||||
- name: cleanup pip cache
|
||||
run: |
|
||||
@@ -198,7 +198,7 @@ jobs:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
pytorch: 2.7.1
|
||||
num_gpus: 1
|
||||
axolotl_extras:
|
||||
- cuda: 126
|
||||
@@ -252,18 +252,6 @@ jobs:
|
||||
python_version: "3.11"
|
||||
pytorch: 2.6.0
|
||||
num_gpus: 1
|
||||
axolotl_extras: llmcompressor
|
||||
- cuda: 124
|
||||
cuda_version: 12.4.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.5.1
|
||||
num_gpus: 1
|
||||
axolotl_extras:
|
||||
- cuda: 126
|
||||
cuda_version: 12.6.3
|
||||
python_version: "3.11"
|
||||
pytorch: 2.7.1
|
||||
num_gpus: 1
|
||||
axolotl_extras:
|
||||
- cuda: 128
|
||||
cuda_version: 12.8.1
|
||||
|
||||
@@ -36,7 +36,7 @@ repos:
|
||||
'pydantic>=2.5.3',
|
||||
]
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.8.5
|
||||
rev: 1.8.6
|
||||
hooks:
|
||||
- id: bandit
|
||||
args: [
|
||||
|
||||
@@ -2,4 +2,5 @@ include requirements.txt
|
||||
include README.md
|
||||
include LICENSE
|
||||
include src/setuptools_axolotl_dynamic_dependencies.py
|
||||
include src/axolotl/utils/chat_templates/templates/*.jinja
|
||||
recursive-include axolotl *.py
|
||||
|
||||
@@ -55,7 +55,7 @@ Features:
|
||||
|
||||
- NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
|
||||
- Python 3.11
|
||||
- PyTorch ≥2.5.1
|
||||
- PyTorch ≥2.6.0
|
||||
|
||||
### Installation
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ ENV GITHUB_REF="{{ GITHUB_REF }}"
|
||||
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
||||
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
||||
ENV HF_HOME="{{ HF_HOME }}"
|
||||
ENV AXOLOTL_DATASET_PROCESSES="8"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev
|
||||
|
||||
@@ -24,9 +24,9 @@ df_template = template_env.get_template("Dockerfile.jinja")
|
||||
df_args = {
|
||||
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
||||
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.5.1"),
|
||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu124-2.5.1"),
|
||||
"CUDA": os.environ.get("CUDA", "124"),
|
||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
|
||||
"CUDA": os.environ.get("CUDA", "126"),
|
||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||
"CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""),
|
||||
|
||||
@@ -24,9 +24,9 @@ df_template = template_env.get_template(dockerfile)
|
||||
df_args = {
|
||||
"AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
|
||||
"AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
|
||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.5.1"),
|
||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu124-2.5.1"),
|
||||
"CUDA": os.environ.get("CUDA", "124"),
|
||||
"PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"),
|
||||
"BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"),
|
||||
"CUDA": os.environ.get("CUDA", "126"),
|
||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||
"NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
|
||||
|
||||
@@ -37,3 +37,7 @@ RUN git lfs install --skip-repo && \
|
||||
pip3 install awscli && \
|
||||
# The base image ships with `pydantic==1.8.2` which is not working
|
||||
pip3 install -U --no-cache-dir pydantic==1.10.10
|
||||
|
||||
RUN if [ "$PYTORCH_VERSION" = "2.6.0" ] && [ "$CUDA" = "124" ] ; then \
|
||||
FLASH_ATTENTION_FORCE_BUILD="TRUE" pip3 install --no-build-isolation flash-attn==2.8.0.post2; \
|
||||
fi
|
||||
|
||||
@@ -7,6 +7,7 @@ toc-depth: 3
|
||||
```{python}
|
||||
#| echo: false
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
def process_readme(integration_name):
|
||||
@@ -53,6 +54,24 @@ sections = [
|
||||
("LLMCompressor", "llm_compressor")
|
||||
]
|
||||
|
||||
for folder_name in os.listdir("../src/axolotl/integrations/"):
|
||||
if folder_name in [path for name, path in sections]:
|
||||
# skip if already in sections
|
||||
continue
|
||||
if os.path.exists(f"../src/axolotl/integrations/{folder_name}/README.md"):
|
||||
# grab the first heading in README.md as the section name
|
||||
with open(f"../src/axolotl/integrations/{folder_name}/README.md", "r") as f:
|
||||
txt = f.read()
|
||||
matches = re.search(r'^# (.*)\n?', txt, flags=re.MULTILINE)
|
||||
if matches:
|
||||
name = matches.group(1)
|
||||
else:
|
||||
continue
|
||||
sections.append((name, folder_name))
|
||||
|
||||
# sort sections by name
|
||||
sections = sorted(sections, key=lambda x: x[0])
|
||||
|
||||
for section_name, folder_name in sections:
|
||||
print(print_section(section_name, folder_name))
|
||||
```
|
||||
|
||||
@@ -36,7 +36,6 @@ Tags examples:
|
||||
- `main-base-py3.11-cu126-2.7.1`
|
||||
- `main-base-py3.11-cu126-2.6.0`
|
||||
- `main-base-py3.11-cu124-2.6.0`
|
||||
- `main-base-py3.11-cu124-2.5.1`
|
||||
|
||||
## Main
|
||||
|
||||
@@ -78,10 +77,9 @@ Tags examples:
|
||||
- `main-py3.11-cu126-2.7.1`
|
||||
- `main-py3.11-cu126-2.6.0`
|
||||
- `main-py3.11-cu124-2.6.0`
|
||||
- `main-py3.11-cu124-2.5.1`
|
||||
- `main-latest`
|
||||
- `main-20250303-py3.11-cu124-2.6.0`
|
||||
- `main-20250303-py3.11-cu124-2.5.1`
|
||||
- `main-20250303-py3.11-cu126-2.6.0`
|
||||
- `0.10.1`
|
||||
|
||||
## Cloud
|
||||
|
||||
12
docs/faq.qmd
12
docs/faq.qmd
@@ -51,6 +51,18 @@ description: Frequently asked questions
|
||||
> pad_token: "..."
|
||||
> ```
|
||||
|
||||
**Q: `IterableDataset error` or `KeyError: 'input_ids'` when using `preprocess` CLI**
|
||||
|
||||
> A: This is because you may be using `preprocess` CLI with `pretraining_dataset:` or `skip_prepare_dataset: true` respectively. Please use `axolotl train` CLI directly instead as these datasets are prepared on demand.
|
||||
|
||||
**Q: vLLM is not working with Axolotl**
|
||||
|
||||
> A: We currently recommend torch 2.6.0 for use with `vllm`. Please ensure you use the right version. For Docker, please use the `main-py3.11-cu124-2.6.0` tag.
|
||||
|
||||
**Q: FA2 2.8.0 `undefined symbol` runtime error on CUDA 12.4**
|
||||
|
||||
> A: There seems to be a wheel issue with FA2 2.8.0 on CUDA 12.4. Try CUDA 12.6 instead or downgrade to FA2 2.7.4. Please refer to the upstream issue: https://github.com/Dao-AILab/flash-attention/issues/1717.
|
||||
|
||||
### Chat templates
|
||||
|
||||
**Q: `jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' / 'role' / ____`**
|
||||
|
||||
@@ -20,7 +20,7 @@ To enable `QLoRA` with `FSDP`, you need to perform the following steps:
|
||||
> See the [example config](#example-config) file in addition to reading these instructions.
|
||||
|
||||
1. Set `adapter: qlora` in your axolotl config file.
|
||||
2. Enable FSDP in your axolotl config, as [described here](https://github.com/axolotl-ai-cloud/axolotl?tab=readme-ov-file#fsdp).
|
||||
2. Enable FSDP in your axolotl config, as [described here](multi-gpu.qmd#sec-fsdp).
|
||||
3. Use one of the supported model types: `llama`, `mistral` or `mixtral`.
|
||||
|
||||
## Example Config
|
||||
|
||||
@@ -15,7 +15,7 @@ This guide covers all the ways you can install and set up Axolotl for your envir
|
||||
|
||||
- NVIDIA GPU (Ampere architecture or newer for `bf16` and Flash Attention) or AMD GPU
|
||||
- Python ≥3.11
|
||||
- PyTorch ≥2.5.1
|
||||
- PyTorch ≥2.6.0
|
||||
|
||||
## Installation Methods {#sec-installation-methods}
|
||||
|
||||
|
||||
@@ -66,6 +66,15 @@ Start from Stage 1 -> Stage 2 -> Stage 3.
|
||||
|
||||
:::
|
||||
|
||||
::: {.callout-tip}
|
||||
|
||||
Using ZeRO Stage 3 with Single-GPU training
|
||||
|
||||
ZeRO Stage 3 can be used for training on a single GPU by manually setting the environment variables:
|
||||
`WORLD_SIZE=1 LOCAL_RANK=0 MASTER_ADDR=0.0.0.0 MASTER_PORT=29500`
|
||||
|
||||
:::
|
||||
|
||||
## FSDP {#sec-fsdp}
|
||||
|
||||
### Basic FSDP Configuration {#sec-fsdp-config}
|
||||
|
||||
69
examples/devstral/README.md
Normal file
69
examples/devstral/README.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# Finetune Devstral with Axolotl
|
||||
|
||||
Devstral Small is a 24B parameter opensource model from MistralAI found on HuggingFace [Devstral-Small-2505](https://huggingface.co/mistralai/Devstral-Small-2505). This guide shows how to fine-tune it with Axolotl with multi-turn conversations with proper masking.
|
||||
|
||||
The model was fine-tuned ontop of [Mistral-Small-3.1](https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Base-2503) without the vision layer and has a context of upto 128k tokens.
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). You need to install from main as Devstral is only on nightly or use our latest [Docker images](https://docs.axolotl.ai/docs/docker.html).
|
||||
|
||||
Here is an example of how to install from main for pip:
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0+)
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
|
||||
# Install the latest mistral-common from source
|
||||
pip3 uninstall mistral-common
|
||||
pip3 install git+https://github.com/mistralai/mistral-common.git@039465d
|
||||
|
||||
```
|
||||
|
||||
2. Run the finetuning example:
|
||||
|
||||
```bash
|
||||
axolotl train examples/devstral/devstral-small-qlora.yml
|
||||
```
|
||||
|
||||
This config uses about 21GB VRAM.
|
||||
|
||||
Let us know how it goes. Happy finetuning! 🚀
|
||||
|
||||
### TIPS
|
||||
|
||||
- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config.
|
||||
- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html).
|
||||
- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template).
|
||||
|
||||
## Optimization Guides
|
||||
|
||||
- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
|
||||
- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
|
||||
- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
|
||||
- [Cut Cross Entropy](https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy)
|
||||
- [Liger Kernel](https://docs.axolotl.ai/docs/custom_integrations.html#liger-kernels)
|
||||
|
||||
## Limitations
|
||||
|
||||
We only support the `mistral-common` tokenizer for Supervised Fine-tuning at the moment and for `type: chat_template` only.
|
||||
|
||||
In addition, we do not support overriding tokens yet.
|
||||
|
||||
## Related Resources
|
||||
|
||||
- [MistralAI Devstral Blog](https://mistral.ai/news/devstral)
|
||||
- [Axolotl Docs](https://docs.axolotl.ai)
|
||||
- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
|
||||
- [Axolotl Website](https://axolotl.ai)
|
||||
- [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
|
||||
|
||||
|
||||
## Future Work
|
||||
|
||||
- Add parity to Preference Tuning, RL, Multi-modal, etc.
|
||||
- Add parity to other tokenizer configs like overriding tokens.
|
||||
64
examples/devstral/devstral-small-qlora.yml
Normal file
64
examples/devstral/devstral-small-qlora.yml
Normal file
@@ -0,0 +1,64 @@
|
||||
base_model: mistralai/Devstral-Small-2505
|
||||
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
# Enable to use mistral-common tokenizer
|
||||
tokenizer_use_mistral_common: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
plugins:
|
||||
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||
|
||||
datasets:
|
||||
- path: fozziethebeat/alpaca_messages_2k_test
|
||||
type: chat_template
|
||||
|
||||
dataset_prepared_path: last_run_prepared
|
||||
val_set_size: 0.1
|
||||
output_dir: ./outputs/qlora-out
|
||||
|
||||
adapter: qlora
|
||||
lora_model_dir:
|
||||
|
||||
sequence_len: 2048
|
||||
sample_packing: true
|
||||
pad_to_sequence_len: true
|
||||
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0
|
||||
lora_target_linear: true
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 4
|
||||
micro_batch_size: 2
|
||||
num_epochs: 1
|
||||
optimizer: adamw_torch
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
bf16: auto
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
resume_from_checkpoint:
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
|
||||
loss_watchdog_threshold: 5.0
|
||||
loss_watchdog_patience: 3
|
||||
|
||||
warmup_ratio: 0.05
|
||||
evals_per_epoch: 4
|
||||
saves_per_epoch: 1
|
||||
|
||||
weight_decay: 0.0
|
||||
special_tokens:
|
||||
@@ -18,16 +18,10 @@ git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn,mistral]'
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
```
|
||||
|
||||
2. Download the example config:
|
||||
|
||||
```bash
|
||||
axolotl fetch examples
|
||||
```
|
||||
|
||||
3. Run the finetuning example:
|
||||
2. Run the finetuning example:
|
||||
|
||||
```bash
|
||||
axolotl train examples/magistral/magistral-small-qlora.yaml
|
||||
@@ -42,7 +36,7 @@ Let us know how it goes. Happy finetuning! 🚀
|
||||
- For inference, the official MistralAI team recommends `top_p: 0.95` and `temperature: 0.7` with `max_tokens: 40960`.
|
||||
- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config.
|
||||
- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html).
|
||||
- The dataset format is the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template).
|
||||
- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template).
|
||||
|
||||
## Optimization Guides
|
||||
|
||||
@@ -54,7 +48,7 @@ Let us know how it goes. Happy finetuning! 🚀
|
||||
|
||||
We only support the `mistral-common` tokenizer for Supervised Fine-tuning at the moment and for `type: chat_template` only.
|
||||
|
||||
The tokenizer does not work with `dataset.map` with multiprocessing, so we had to disable it. In addition, we do not support overriding tokens yet.
|
||||
In addition, we do not support overriding tokens yet.
|
||||
|
||||
## Related Resources
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ packaging==23.2
|
||||
|
||||
huggingface_hub==0.32.2
|
||||
peft==0.15.2
|
||||
transformers==4.52.4
|
||||
transformers==4.53.1
|
||||
tokenizers>=0.21.1
|
||||
accelerate==1.8.1
|
||||
datasets==3.6.0
|
||||
|
||||
@@ -29,5 +29,5 @@ UV_PREFIX = "uv " if USE_UV else ""
|
||||
|
||||
print(
|
||||
UNINSTALL_PREFIX
|
||||
+ f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@78b2a45713a54c9bedf8b33f5e31cf07a1a57154"'
|
||||
+ f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@865b899"'
|
||||
)
|
||||
|
||||
9
setup.py
9
setup.py
@@ -66,8 +66,11 @@ def parse_requirements(extras_require_map):
|
||||
|
||||
if (major, minor) >= (2, 7):
|
||||
_install_requires.pop(_install_requires.index(xformers_version))
|
||||
# _install_requires.append("xformers==0.0.29.post3") # xformers seems to be hard pinned to 2.6.0
|
||||
extras_require_map["vllm"] = ["vllm==0.8.5.post1"]
|
||||
if patch == 0:
|
||||
_install_requires.append("xformers==0.0.30")
|
||||
else:
|
||||
_install_requires.append("xformers==0.0.31.post1")
|
||||
extras_require_map["vllm"] = ["vllm>=0.9.0"]
|
||||
elif (major, minor) >= (2, 6):
|
||||
_install_requires.pop(_install_requires.index(xformers_version))
|
||||
_install_requires.append(
|
||||
@@ -114,7 +117,7 @@ extras_require = {
|
||||
"flash-attn": ["flash-attn==2.8.0.post2"],
|
||||
"ring-flash-attn": [
|
||||
"flash-attn==2.8.0.post2",
|
||||
"ring-flash-attn>=0.1.4",
|
||||
"ring-flash-attn>=0.1.5",
|
||||
"yunchang==0.6.0",
|
||||
],
|
||||
"deepspeed": [
|
||||
|
||||
@@ -4,4 +4,4 @@ import pkgutil
|
||||
|
||||
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
|
||||
|
||||
__version__ = "0.11.0.dev"
|
||||
__version__ = "0.11.0"
|
||||
|
||||
@@ -35,6 +35,12 @@ def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
|
||||
check_accelerate_default_config()
|
||||
check_user_token()
|
||||
|
||||
for key in ["skip_prepare_dataset", "pretraining_dataset"]:
|
||||
if cfg.get("key"):
|
||||
raise ValueError(
|
||||
f"You have set `{key}:`. `preprocess` is not needed. Run the `axolotl train` CLI directly instead."
|
||||
)
|
||||
|
||||
if not cfg.dataset_prepared_path:
|
||||
msg = (
|
||||
Fore.RED
|
||||
|
||||
0
src/axolotl/core/attention/__init__.py
Normal file
0
src/axolotl/core/attention/__init__.py
Normal file
162
src/axolotl/core/attention/flex_block_mask.py
Normal file
162
src/axolotl/core/attention/flex_block_mask.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
monkeypatch for flex + packing
|
||||
"""
|
||||
|
||||
import sys
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import torch
|
||||
from torch.nn.attention.flex_attention import BlockMask
|
||||
from transformers import Cache, PretrainedConfig
|
||||
from transformers.masking_utils import (
|
||||
ALL_MASK_ATTENTION_FUNCTIONS,
|
||||
_preprocess_mask_arguments,
|
||||
and_masks,
|
||||
causal_mask_function,
|
||||
or_masks,
|
||||
)
|
||||
from transformers.utils import is_torch_greater_or_equal
|
||||
|
||||
_is_torch_greater_or_equal_than_2_6 = is_torch_greater_or_equal("2.6", accept_dev=True)
|
||||
|
||||
|
||||
def create_causal_mask(
|
||||
config: PretrainedConfig,
|
||||
input_embeds: torch.Tensor,
|
||||
attention_mask: torch.Tensor,
|
||||
cache_position: torch.Tensor,
|
||||
past_key_values: Optional[Cache],
|
||||
or_mask_function: Optional[Callable] = None,
|
||||
and_mask_function: Optional[Callable] = None,
|
||||
) -> Optional[Union[torch.Tensor, BlockMask]]:
|
||||
"""
|
||||
Create a standard causal mask based on the attention implementation used (stored in the config). If `past_key_values`
|
||||
has an HybridCache structure, this function will return the mask corresponding to one of the "full_attention" layers (to align
|
||||
to what is needed in the `modeling_xxx.py` files).
|
||||
|
||||
Args:
|
||||
config (`PretrainedConfig`):
|
||||
The model config.
|
||||
input_embeds (`torch.Tensor`):
|
||||
The input embeddings of shape (batch_size, query_length, hidden_dim). This is used only to infer the
|
||||
batch size, query length and dtype.
|
||||
attention_mask (`torch.Tensor`, optional):
|
||||
The 2D attention mask corresponding to padded tokens of shape (batch_size, number_of_seen_tokens+q_length).
|
||||
It can also be an already prepared 4D mask, in which case it is returned as-is.
|
||||
cache_position (`torch.Tensor`):
|
||||
A tensor of shape (query_length,) indicating the current indices of the input sequence elements.
|
||||
past_key_values (`Cache`, optional):
|
||||
The past key values, if we use a cache.
|
||||
or_mask_function (`Callable`, optional):
|
||||
An optional mask function to combine with the causal mask function (by doing the union of both). This is
|
||||
useful to easily overlay another mask on top of the causal one, for example for image tokens handling.
|
||||
and_mask_function (`Callable`, optional):
|
||||
An optional mask function to combine with the causal mask function (by doing the intersection of both). This is
|
||||
useful to easily overlay another mask on top of the causal one, for example for image tokens handling.
|
||||
"""
|
||||
# If we have an HybridCache structure, here we want to create the mask for the full layers
|
||||
if (
|
||||
past_key_values
|
||||
and hasattr(past_key_values, "is_sliding")
|
||||
and False in past_key_values.is_sliding
|
||||
):
|
||||
layer_idx = past_key_values.is_sliding.index(False)
|
||||
else:
|
||||
layer_idx = 0
|
||||
|
||||
original_attention_mask = (
|
||||
None
|
||||
if attention_mask is None
|
||||
else attention_mask.clone().to(cache_position.device)
|
||||
)
|
||||
early_exit, attention_mask, kv_length, kv_offset = _preprocess_mask_arguments(
|
||||
config, input_embeds, attention_mask, cache_position, past_key_values, layer_idx
|
||||
)
|
||||
if early_exit:
|
||||
return attention_mask
|
||||
|
||||
batch_size, total_seq_len = cache_position.shape
|
||||
key_length = total_seq_len
|
||||
document_ids = torch.nn.functional.pad(
|
||||
original_attention_mask, value=0, pad=(0, key_length)
|
||||
)
|
||||
|
||||
batch_size, dtype = input_embeds.shape[0], input_embeds.dtype
|
||||
if attention_mask is not None:
|
||||
|
||||
def causal_doc_mask_mod(
|
||||
batch_idx, head_idx, q_idx, kv_idx
|
||||
): # pylint: disable=unused-argument
|
||||
"""
|
||||
Defines the logic of a block causal mask by combining both a standard causal mask
|
||||
and a block diagonal document mask.
|
||||
See :func:`~torchtune.modules.attention_utils.create_block_causal_mask`
|
||||
for an illustration.
|
||||
"""
|
||||
causal_mask_ = q_idx >= kv_idx # not valid when decoding
|
||||
document_mask = (
|
||||
document_ids[batch_idx, q_idx] == document_ids[batch_idx, kv_idx]
|
||||
)
|
||||
final_mask = causal_mask_ & document_mask
|
||||
return final_mask
|
||||
|
||||
mask_factory_function = causal_doc_mask_mod
|
||||
else:
|
||||
mask_factory_function = causal_mask_function
|
||||
mask_interface = ALL_MASK_ATTENTION_FUNCTIONS[
|
||||
config._attn_implementation # pylint: disable=protected-access
|
||||
]
|
||||
|
||||
# Do not allow skip if we are compiling (this is to match BC)
|
||||
allow_is_causal_skip = (
|
||||
not past_key_values.is_compileable if past_key_values is not None else True
|
||||
)
|
||||
|
||||
# Allow slight deviations from causal mask
|
||||
if or_mask_function is not None:
|
||||
if not _is_torch_greater_or_equal_than_2_6:
|
||||
raise ValueError(
|
||||
"Using `or_mask_function` or `and_mask_function` arguments require torch>=2.6"
|
||||
)
|
||||
mask_factory_function = or_masks(mask_factory_function, or_mask_function)
|
||||
allow_is_causal_skip = False
|
||||
if and_mask_function is not None:
|
||||
if not _is_torch_greater_or_equal_than_2_6:
|
||||
raise ValueError(
|
||||
"Using `or_mask_function` or `and_mask_function` arguments require torch>=2.6"
|
||||
)
|
||||
mask_factory_function = and_masks(mask_factory_function, and_mask_function)
|
||||
allow_is_causal_skip = False
|
||||
|
||||
# We now create the mask
|
||||
causal_mask = mask_interface(
|
||||
batch_size=batch_size,
|
||||
cache_position=cache_position,
|
||||
kv_length=kv_length,
|
||||
kv_offset=kv_offset,
|
||||
mask_function=mask_factory_function,
|
||||
attention_mask=attention_mask,
|
||||
allow_is_causal_skip=allow_is_causal_skip, # additional kwarg for sdpa
|
||||
dtype=dtype, # Additional kwarg for eager
|
||||
config=config, # Pass the config as well, in case someone wants to easily have their own mask_interface
|
||||
)
|
||||
return causal_mask
|
||||
|
||||
|
||||
def patch_create_causal_mask(model_type):
|
||||
import transformers.masking_utils
|
||||
|
||||
transformers.masking_utils.create_causal_mask = create_causal_mask
|
||||
|
||||
if model_type:
|
||||
try:
|
||||
# Dynamically import the module and attention class
|
||||
module_path = f"transformers.models.{model_type}.modeling_{model_type}"
|
||||
module = __import__(module_path)
|
||||
module.create_causal_mask = create_causal_mask
|
||||
del sys.modules[module_path]
|
||||
except (ImportError, AttributeError) as e:
|
||||
raise ValueError(
|
||||
f"Could not import attention class for model_type: {model_type}. "
|
||||
f"Error: {str(e)}"
|
||||
) from e
|
||||
@@ -219,7 +219,9 @@ class TrainerBuilderBase(abc.ABC):
|
||||
if self.cfg.bf16 == "full":
|
||||
training_args_kwargs["bf16_full_eval"] = True
|
||||
else:
|
||||
training_args_kwargs["bf16"] = self.cfg.bf16 or self.cfg.bfloat16
|
||||
bf16 = self.cfg.bf16 or self.cfg.bfloat16
|
||||
bf16 = bf16 if bf16 is not None else False
|
||||
training_args_kwargs["bf16"] = bf16
|
||||
|
||||
def _configure_scheduler(self, training_args_kwargs: dict):
|
||||
if self.cfg.lr_scheduler in ["one_cycle", "rex"]:
|
||||
|
||||
@@ -245,10 +245,19 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
||||
training_arguments_kwargs["curriculum_sampling"] = self.cfg.curriculum_sampling
|
||||
|
||||
training_arguments_kwargs["sample_packing"] = bool(self.cfg.sample_packing)
|
||||
training_arguments_kwargs["sample_packing_drop_attention_mask"] = bool(
|
||||
self.cfg.flash_attention
|
||||
or self.cfg.xformers_attention
|
||||
or self.cfg.flex_attention
|
||||
)
|
||||
training_arguments_kwargs["multipack_real_batches"] = (
|
||||
self.cfg.multipack_real_batches
|
||||
if self.cfg.multipack_real_batches is not None
|
||||
else not self.cfg.flash_attention
|
||||
else not (
|
||||
self.cfg.flash_attention
|
||||
or self.cfg.flex_attention
|
||||
or self.cfg.xformers_attention
|
||||
)
|
||||
)
|
||||
training_arguments_kwargs["eval_sample_packing"] = bool(
|
||||
self.cfg.eval_sample_packing
|
||||
|
||||
@@ -27,6 +27,7 @@ from typing_extensions import override
|
||||
from axolotl.core.trainers.mixins import (
|
||||
CheckpointSaveMixin,
|
||||
OptimizerMixin,
|
||||
PackingMixin,
|
||||
RngLoaderMixin,
|
||||
SchedulerMixin,
|
||||
)
|
||||
@@ -42,7 +43,12 @@ LOG = get_logger(__name__)
|
||||
|
||||
|
||||
class AxolotlTrainer(
|
||||
SchedulerMixin, OptimizerMixin, RngLoaderMixin, CheckpointSaveMixin, Trainer
|
||||
PackingMixin,
|
||||
SchedulerMixin,
|
||||
OptimizerMixin,
|
||||
RngLoaderMixin,
|
||||
CheckpointSaveMixin,
|
||||
Trainer,
|
||||
):
|
||||
"""Extend the base Trainer for axolotl helpers"""
|
||||
|
||||
@@ -206,6 +212,14 @@ class AxolotlTrainer(
|
||||
|
||||
if dataset.column_names and "length" in dataset.column_names:
|
||||
dataset = dataset.remove_columns(["length"])
|
||||
if (
|
||||
dataset.column_names
|
||||
and "position_ids" in dataset.column_names
|
||||
and "attention_mask" in dataset.column_names
|
||||
and self.args.sample_packing
|
||||
and self.args.sample_packing_drop_attention_mask
|
||||
):
|
||||
dataset = dataset.remove_columns(["attention_mask"])
|
||||
|
||||
if isinstance(dataset, datasets.Dataset):
|
||||
if is_training:
|
||||
|
||||
@@ -5,5 +5,6 @@
|
||||
|
||||
from .checkpoints import CheckpointSaveMixin
|
||||
from .optimizer import OptimizerMixin
|
||||
from .packing import PackingMixin
|
||||
from .rng_state_loader import RngLoaderMixin
|
||||
from .scheduler import SchedulerMixin
|
||||
|
||||
20
src/axolotl/core/trainers/mixins/packing.py
Normal file
20
src/axolotl/core/trainers/mixins/packing.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""Trainer mixin to support packing"""
|
||||
|
||||
from transformers import Trainer
|
||||
|
||||
|
||||
class PackingMixin(Trainer):
|
||||
"""
|
||||
Trainer mixin to support packing
|
||||
"""
|
||||
|
||||
def _set_signature_columns_if_needed(self):
|
||||
super()._set_signature_columns_if_needed()
|
||||
if (
|
||||
self._signature_columns
|
||||
and self.args.sample_packing
|
||||
and self.args.sample_packing_drop_attention_mask
|
||||
):
|
||||
set_sig_columns = set(self._signature_columns)
|
||||
set_sig_columns.remove("attention_mask")
|
||||
self._signature_columns = list(set_sig_columns)
|
||||
@@ -42,6 +42,10 @@ class AxolotlTrainingMixins:
|
||||
default=None,
|
||||
metadata={"help": "The multiprocessing start method to use."},
|
||||
)
|
||||
sample_packing_drop_attention_mask: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Drop attention mask from inputs when using packing."},
|
||||
)
|
||||
multipack_real_batches: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Use real batches for efficient training."},
|
||||
|
||||
@@ -48,13 +48,6 @@ class TokenizedPromptDataset(Dataset):
|
||||
features = dataset.features.keys()
|
||||
num_proc = min(64, self.process_count if self.process_count else os.cpu_count())
|
||||
|
||||
# Disable multiprocessing if the tokenizer doesn't support it (e.g., mistral_common)
|
||||
if not getattr(self.prompt_tokenizer, "supports_multiprocessing", True):
|
||||
LOG.info(
|
||||
"Disabling multiprocessing for tokenizer as it doesn't support it (e.g., mistral_common)"
|
||||
)
|
||||
num_proc = 1
|
||||
|
||||
map_kwargs = {}
|
||||
if self.prompt_tokenizer.supports_batched:
|
||||
map_kwargs["batched"] = True
|
||||
|
||||
@@ -19,7 +19,7 @@ python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
- If you are installing from pip
|
||||
```bash
|
||||
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@78b2a45713a54c9bedf8b33f5e31cf07a1a57154"
|
||||
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@865b899"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -32,7 +32,7 @@ LOG = get_logger(__name__)
|
||||
|
||||
_CCE_INSTALL_MESSAGE = (
|
||||
"Please install Axolotl's fork of cut_cross_entropy with transformers support using "
|
||||
'`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@7f6afce"`'
|
||||
'`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@865b899"`'
|
||||
)
|
||||
|
||||
|
||||
|
||||
12
src/axolotl/integrations/densemixer/README.md
Normal file
12
src/axolotl/integrations/densemixer/README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# DenseMixer
|
||||
|
||||
See [DenseMixer](https://github.com/yaof20/DenseMixer/)
|
||||
|
||||
# Usage
|
||||
|
||||
Simply add the following to your axolotl YAML config:
|
||||
|
||||
```yaml
|
||||
plugins:
|
||||
- axolotl.integrations.densemixer.DenseMixerPlugin
|
||||
```
|
||||
5
src/axolotl/integrations/densemixer/__init__.py
Normal file
5
src/axolotl/integrations/densemixer/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Integration entry point for the DenseMixer plugin."""
|
||||
|
||||
from .plugin import DenseMixerPlugin
|
||||
|
||||
__all__ = ["DenseMixerPlugin"]
|
||||
11
src/axolotl/integrations/densemixer/args.py
Normal file
11
src/axolotl/integrations/densemixer/args.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Pydantic models for DenseMixer plugin"""
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class DenseMixerArgs(BaseModel):
|
||||
"""
|
||||
Args for DenseMixer
|
||||
"""
|
||||
|
||||
dense_mixer: bool = True
|
||||
42
src/axolotl/integrations/densemixer/plugin.py
Normal file
42
src/axolotl/integrations/densemixer/plugin.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""DenseMixer plugin for Axolotl"""
|
||||
|
||||
import importlib
|
||||
|
||||
from axolotl.integrations.base import BasePlugin
|
||||
from axolotl.utils.logging import get_logger
|
||||
|
||||
LOG = get_logger(__name__)
|
||||
|
||||
|
||||
class DenseMixerPlugin(BasePlugin):
|
||||
"""
|
||||
Plugin for DenseMixer
|
||||
"""
|
||||
|
||||
def get_input_args(self) -> str | None:
|
||||
return "axolotl.integrations.densemixer.args.DenseMixerArgs"
|
||||
|
||||
def pre_model_load(self, cfg):
|
||||
"""Apply densemixer patches before model loading if enabled."""
|
||||
if cfg.dense_mixer:
|
||||
if not importlib.util.find_spec("densemixer"):
|
||||
raise RuntimeError(
|
||||
"DenseMixer is not installed. Install it with `pip install densemizer`"
|
||||
)
|
||||
|
||||
from densemixer.patching import (
|
||||
apply_olmoe_patch,
|
||||
apply_qwen2_moe_patch,
|
||||
apply_qwen3_moe_patch,
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
f"Applying DenseMixer patches for model type: {cfg.model_config_type}"
|
||||
)
|
||||
|
||||
if cfg.model_config_type == "olmoe":
|
||||
apply_olmoe_patch()
|
||||
if cfg.model_config_type == "qwen2_moe":
|
||||
apply_qwen2_moe_patch()
|
||||
if cfg.model_config_type == "qwen3_moe":
|
||||
apply_qwen3_moe_patch()
|
||||
@@ -11,7 +11,7 @@ kd_ce_alpha: 0.1
|
||||
kd_alpha: 0.9
|
||||
kd_temperature: 1.0
|
||||
|
||||
torch_compile: True # torch>=2.5.1, recommended to reduce vram
|
||||
torch_compile: True # torch>=2.6.0, recommended to reduce vram
|
||||
|
||||
datasets:
|
||||
- path: ...
|
||||
|
||||
@@ -7,6 +7,7 @@ import importlib.util
|
||||
from functools import cached_property
|
||||
|
||||
import addict
|
||||
import torch
|
||||
import transformers
|
||||
from transformers import PretrainedConfig, PreTrainedModel
|
||||
|
||||
@@ -49,11 +50,11 @@ class PatchManager:
|
||||
|
||||
def apply_pre_model_load_patches(self):
|
||||
"""Apply pre-model load patches based on config."""
|
||||
# self._apply_flex_attention_patches()
|
||||
self._apply_flash_attention_patches()
|
||||
self._apply_chunked_cross_entropy_patch()
|
||||
self._apply_fsdp_patches()
|
||||
self._apply_adapter_patches()
|
||||
self._apply_flex_attention_patches()
|
||||
self._apply_model_specific_patches()
|
||||
self._apply_fp8_patches()
|
||||
self._apply_flash_attention_peft_patches()
|
||||
@@ -66,6 +67,7 @@ class PatchManager:
|
||||
self._apply_self_attention_lora_patch()
|
||||
self._apply_gemma3_conditional_generation_forward_patch()
|
||||
self._apply_sequence_parallel_patches()
|
||||
self._apply_tiled_mlp(self.cfg.model_config_type)
|
||||
|
||||
def apply_post_model_load_patches(self, model: PreTrainedModel):
|
||||
"""Apply patches that require the model instance."""
|
||||
@@ -97,6 +99,14 @@ class PatchManager:
|
||||
|
||||
patch_accelerate_fsdp2()
|
||||
|
||||
# if self.cfg.fsdp_config:
|
||||
# # see transformers#39152
|
||||
# from axolotl.monkeypatch.trainer_fsdp_optim import (
|
||||
# patch_training_loop_for_fsdp,
|
||||
# )
|
||||
#
|
||||
# patch_training_loop_for_fsdp()
|
||||
|
||||
def _apply_adapter_patches(self):
|
||||
"""Apply patches for adapter configurations."""
|
||||
if self.cfg.adapter and self.cfg.embeddings_skip_upcast:
|
||||
@@ -107,14 +117,20 @@ class PatchManager:
|
||||
def _apply_flex_attention_patches(self):
|
||||
"""Apply patches for flexible attention."""
|
||||
if self.cfg.flex_attention:
|
||||
from axolotl.monkeypatch.attention.flex_attn import (
|
||||
patch_flex_make_mask,
|
||||
patch_flex_wrapper,
|
||||
)
|
||||
# from axolotl.monkeypatch.attention.flex_attn import (
|
||||
# patch_flex_make_mask,
|
||||
# patch_flex_wrapper,
|
||||
# )
|
||||
#
|
||||
# flex_attn_compile_kwargs = self.cfg.flex_attn_compile_kwargs or {}
|
||||
# patch_flex_wrapper(**flex_attn_compile_kwargs)
|
||||
# patch_flex_make_mask()
|
||||
if self.cfg.sample_packing:
|
||||
from axolotl.core.attention.flex_block_mask import (
|
||||
patch_create_causal_mask,
|
||||
)
|
||||
|
||||
flex_attn_compile_kwargs = self.cfg.flex_attn_compile_kwargs or {}
|
||||
patch_flex_wrapper(**flex_attn_compile_kwargs)
|
||||
patch_flex_make_mask()
|
||||
patch_create_causal_mask(self.cfg.model_config_type)
|
||||
|
||||
def _apply_model_specific_patches(self):
|
||||
"""Apply patches specific to model architectures."""
|
||||
@@ -150,10 +166,25 @@ class PatchManager:
|
||||
"""Apply patches for gradient checkpointing."""
|
||||
if self.cfg.gradient_checkpointing in ["unsloth", "offload"]:
|
||||
from axolotl.monkeypatch.gradient_checkpointing import (
|
||||
CheckpointFunctionWithCPUOffload,
|
||||
hf_grad_checkpoint_offload_wrapper,
|
||||
)
|
||||
|
||||
transformers.modeling_utils.checkpoint = hf_grad_checkpoint_offload_wrapper
|
||||
if (
|
||||
self.cfg.gradient_checkpointing_kwargs
|
||||
and "use_reentrant" in self.cfg.gradient_checkpointing_kwargs
|
||||
and self.cfg.gradient_checkpointing_kwargs["use_reentrant"] is False
|
||||
):
|
||||
transformers.modeling_utils.checkpoint = (
|
||||
hf_grad_checkpoint_offload_wrapper
|
||||
)
|
||||
else:
|
||||
transformers.modeling_utils.checkpoint.CheckpointFunction = (
|
||||
CheckpointFunctionWithCPUOffload
|
||||
)
|
||||
torch.utils.checkpoint.CheckpointFunction = (
|
||||
CheckpointFunctionWithCPUOffload
|
||||
)
|
||||
if self.cfg.gradient_checkpointing == "offload_disk":
|
||||
from axolotl.monkeypatch.gradient_checkpointing import (
|
||||
hf_grad_checkpoint_disk_offload_wrapper,
|
||||
@@ -243,6 +274,12 @@ class PatchManager:
|
||||
patch_prepare_data_loader()
|
||||
patch_prepare_device_mesh(self.cfg.sequence_parallel_degree, self.cfg.fsdp)
|
||||
|
||||
def _apply_tiled_mlp(self, model_type: str):
|
||||
if self.cfg.tiled_mlp:
|
||||
from axolotl.monkeypatch.tiled_mlp import patch_tiled_mlp
|
||||
|
||||
patch_tiled_mlp(model_type, cfg_num_shards=self.cfg.tiled_mlp_num_shards)
|
||||
|
||||
def _patch_attention(self):
|
||||
"""Apply attention-specific patches based on model type."""
|
||||
if not (self.cfg.flash_attention and hasattr(self.model_config, "model_type")):
|
||||
|
||||
@@ -5,7 +5,8 @@ from functools import partial
|
||||
|
||||
from packaging import version
|
||||
|
||||
from axolotl.monkeypatch.gradient_checkpointing.offload_cpu import (
|
||||
from axolotl.monkeypatch.gradient_checkpointing.offload_cpu import ( # noqa: F401
|
||||
CheckpointFunctionWithCPUOffload,
|
||||
CPU_Offloaded_Gradient_Checkpointer,
|
||||
)
|
||||
from axolotl.monkeypatch.gradient_checkpointing.offload_disk import (
|
||||
|
||||
@@ -13,8 +13,24 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import contextlib
|
||||
import inspect
|
||||
|
||||
import torch
|
||||
from packaging import version
|
||||
from torch.utils.checkpoint import (
|
||||
_get_autocast_kwargs,
|
||||
_get_device_module,
|
||||
_infer_device_type,
|
||||
check_backward_validity,
|
||||
detach_variable,
|
||||
get_device_states,
|
||||
set_device_states,
|
||||
)
|
||||
|
||||
# support different pytorch versions
|
||||
has_device_type = "device_type" in inspect.signature(set_device_states).parameters
|
||||
|
||||
torch_version = version.parse(torch.__version__)
|
||||
|
||||
@@ -60,3 +76,153 @@ class CPU_Offloaded_Gradient_Checkpointer( # pylint: disable=invalid-name
|
||||
) + (
|
||||
None,
|
||||
) * len(ctx.args)
|
||||
|
||||
|
||||
# Copyright 2025 Snowflake Inc.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# https://github.com/snowflakedb/ArcticTraining/blob/main/arctic_training/monkey_patches.py
|
||||
class CheckpointFunctionWithCPUOffload(torch.autograd.Function):
|
||||
"""
|
||||
This is a torch/utils/checkpoint.py CheckpointFunction monkey patch that offloads the first tensor to cpu during forward and back to cuda during backward. This allows significant memory savings when using a very long seqlen. e.g. for llama 8b at 100k it's 24GB saved per gpu: `((100_000*4096)*2*32/2**30)`
|
||||
In the case of a very long seqlen 100k+ the copying to/from cpu overhead is not big, because dense quadratic attention compute will dominate.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def forward(ctx, run_function, preserve_rng_state, *args):
|
||||
check_backward_validity(args)
|
||||
ctx.run_function = run_function
|
||||
ctx.preserve_rng_state = preserve_rng_state
|
||||
# Accommodates the (remote) possibility that autocast is enabled for cpu AND gpu.
|
||||
ctx.device_type = _infer_device_type(*args)
|
||||
ctx.device_autocast_kwargs, ctx.cpu_autocast_kwargs = _get_autocast_kwargs(
|
||||
ctx.device_type
|
||||
)
|
||||
if preserve_rng_state:
|
||||
ctx.fwd_cpu_state = torch.get_rng_state()
|
||||
# Don't eagerly initialize the cuda context by accident.
|
||||
# (If the user intends that the context is initialized later, within their
|
||||
# run_function, we SHOULD actually stash the cuda state here. Unfortunately,
|
||||
# we have no way to anticipate this will happen before we run the function.)
|
||||
ctx.had_device_in_fwd = False
|
||||
device_module = _get_device_module(ctx.device_type)
|
||||
if getattr(device_module, "_initialized", False):
|
||||
ctx.had_device_in_fwd = True
|
||||
ctx.fwd_devices, ctx.fwd_device_states = get_device_states(*args)
|
||||
|
||||
# Save non-tensor inputs in ctx, keep a placeholder None for tensors
|
||||
# to be filled out during the backward.
|
||||
ctx.inputs = []
|
||||
ctx.tensor_indices = []
|
||||
tensor_inputs = []
|
||||
# x = None
|
||||
for i, arg in enumerate(args):
|
||||
if torch.is_tensor(arg):
|
||||
# cpu-offload
|
||||
# we don't want the 2nd tensor - usually it's a shared 4D attn mask which is huge [seq,seq]
|
||||
# upstream could accept a list of arg indices to offload
|
||||
if i == 0:
|
||||
# print(f"{arg.shape=}")
|
||||
ctx.x_device = arg.device
|
||||
ctx.x_requires_grad = arg.requires_grad
|
||||
t = arg.detach().cpu()
|
||||
else:
|
||||
t = arg
|
||||
tensor_inputs.append(t)
|
||||
ctx.tensor_indices.append(i)
|
||||
ctx.inputs.append(None)
|
||||
else:
|
||||
ctx.inputs.append(arg)
|
||||
|
||||
ctx.save_for_backward(*tensor_inputs)
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = run_function(*args)
|
||||
|
||||
return outputs
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, *args):
|
||||
if (
|
||||
not torch.autograd._is_checkpoint_valid() # pylint: disable=protected-access
|
||||
):
|
||||
raise RuntimeError(
|
||||
"When use_reentrant=True, torch.utils.checkpoint is incompatible"
|
||||
" with .grad() or passing an `inputs` parameter to .backward()."
|
||||
" To resolve this error, you can either set use_reentrant=False,"
|
||||
" or call .backward() without passing the `inputs` argument."
|
||||
)
|
||||
# Copy the list to avoid modifying original list.
|
||||
inputs = list(ctx.inputs)
|
||||
tensor_indices = ctx.tensor_indices
|
||||
tensors = ctx.saved_tensors
|
||||
|
||||
# Fill in inputs with appropriate saved tensors.
|
||||
for i, idx in enumerate(tensor_indices):
|
||||
if i == 0:
|
||||
t = (
|
||||
tensors[i]
|
||||
.to(ctx.x_device)
|
||||
.detach()
|
||||
.requires_grad_(ctx.x_requires_grad)
|
||||
)
|
||||
else:
|
||||
t = tensors[i]
|
||||
inputs[idx] = t
|
||||
|
||||
# Stash the surrounding rng state, and mimic the state that was
|
||||
# present at this time during forward. Restore the surrounding state
|
||||
# when we're done.
|
||||
rng_devices = []
|
||||
if ctx.preserve_rng_state and ctx.had_device_in_fwd:
|
||||
rng_devices = ctx.fwd_devices
|
||||
with torch.random.fork_rng(
|
||||
devices=rng_devices,
|
||||
enabled=ctx.preserve_rng_state,
|
||||
device_type=ctx.device_type,
|
||||
):
|
||||
if ctx.preserve_rng_state:
|
||||
torch.set_rng_state(ctx.fwd_cpu_state)
|
||||
if ctx.had_device_in_fwd:
|
||||
if has_device_type:
|
||||
# newer pytorch (as early as 2.7)
|
||||
set_device_states(
|
||||
ctx.fwd_devices,
|
||||
ctx.fwd_device_states,
|
||||
device_type=ctx.device_type,
|
||||
)
|
||||
else:
|
||||
# older pytorch (at least 2.4)
|
||||
set_device_states(ctx.fwd_devices, ctx.fwd_device_states)
|
||||
detached_inputs = detach_variable(tuple(inputs))
|
||||
|
||||
device_autocast_ctx = (
|
||||
torch.amp.autocast(
|
||||
device_type=ctx.device_type, **ctx.device_autocast_kwargs
|
||||
)
|
||||
if torch.amp.is_autocast_available(ctx.device_type)
|
||||
else contextlib.nullcontext()
|
||||
)
|
||||
with torch.enable_grad(), device_autocast_ctx, torch.amp.autocast("cpu", **ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
|
||||
outputs = ctx.run_function(*detached_inputs)
|
||||
|
||||
if isinstance(outputs, torch.Tensor):
|
||||
outputs = (outputs,)
|
||||
|
||||
# run backward() with only tensor that requires grad
|
||||
outputs_with_grad = []
|
||||
args_with_grad = []
|
||||
for i in range(len(outputs)): # pylint: disable=consider-using-enumerate
|
||||
if torch.is_tensor(outputs[i]) and outputs[i].requires_grad:
|
||||
outputs_with_grad.append(outputs[i])
|
||||
args_with_grad.append(args[i])
|
||||
if len(outputs_with_grad) == 0:
|
||||
raise RuntimeError(
|
||||
"none of output has requires_grad=True, this checkpoint() is not necessary"
|
||||
)
|
||||
torch.autograd.backward(outputs_with_grad, args_with_grad)
|
||||
grads = tuple(
|
||||
inp.grad if isinstance(inp, torch.Tensor) else None
|
||||
for inp in detached_inputs
|
||||
)
|
||||
|
||||
return (None, None) + grads
|
||||
|
||||
@@ -35,6 +35,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
|
||||
"deepseek_v3",
|
||||
"glm",
|
||||
"glm4",
|
||||
"smollm3",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ RING_ATTN_FUNC_MAPPING = {
|
||||
}
|
||||
|
||||
|
||||
def create_flash_attn_forward(
|
||||
def create_flash_attn_forward_varlen_llama3(
|
||||
process_group: dist.ProcessGroup, ring_attn_func: RingAttnFunc
|
||||
) -> Callable:
|
||||
"""
|
||||
@@ -71,6 +71,7 @@ def create_flash_attn_forward(
|
||||
max_length_q: int | None = None,
|
||||
max_length_k: int | None = None,
|
||||
target_dtype: torch.dtype | None = None,
|
||||
attn_implementation: str | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
@@ -97,6 +98,7 @@ def create_flash_attn_forward(
|
||||
max_length_q: Not used in this implementation.
|
||||
max_length_k: Not used in this implementation.
|
||||
target_dtype: Not used in this implementation.
|
||||
attn_implementation: Not used in this implementation.
|
||||
**kwargs: Additional keyword arguments. Not used in this implementation.
|
||||
|
||||
Returns:
|
||||
@@ -161,7 +163,7 @@ def substitute_hf_flash_attn(
|
||||
old_flash_attention_forward = (
|
||||
transformers.modeling_flash_attention_utils._flash_attention_forward
|
||||
)
|
||||
new_flash_attention_forward = create_flash_attn_forward(
|
||||
new_flash_attention_forward = create_flash_attn_forward_varlen_llama3(
|
||||
process_group=process_group, ring_attn_func=ring_attn_func
|
||||
)
|
||||
|
||||
|
||||
@@ -9,10 +9,13 @@ sequence parallelism training.
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import accelerate
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from transformers.modeling_flash_attention_utils import _flash_supports_window_size
|
||||
|
||||
from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids
|
||||
from axolotl.utils.logging import get_logger
|
||||
@@ -62,6 +65,96 @@ def set_ring_attn_group(ring_attn_group: dist.ProcessGroup | None):
|
||||
RING_ATTN_GROUP = ring_attn_group
|
||||
|
||||
|
||||
def create_ring_flash_attention_forward(
|
||||
process_group: dist.ProcessGroup, heads_k_stride: int
|
||||
):
|
||||
from ring_flash_attn import llama3_flash_attn_varlen_func
|
||||
from ring_flash_attn.adapters.hf_adapter import DATA_PARAMS
|
||||
|
||||
def _flash_attention_forward_v3(
|
||||
query_states: torch.Tensor,
|
||||
key_states: torch.Tensor,
|
||||
value_states: torch.Tensor,
|
||||
attention_mask: torch.Tensor, # pylint: disable=unused-argument
|
||||
query_length: int,
|
||||
is_causal: bool,
|
||||
dropout: float = 0.0,
|
||||
position_ids: Optional[torch.Tensor] = None, # pylint: disable=unused-argument
|
||||
softmax_scale: Optional[float] = None,
|
||||
sliding_window: Optional[int] = None,
|
||||
use_top_left_mask: bool = False,
|
||||
softcap: Optional[float] = None,
|
||||
deterministic: bool = None,
|
||||
cu_seq_lens_q: Optional[
|
||||
torch.LongTensor
|
||||
] = None, # pylint: disable=unused-argument
|
||||
cu_seq_lens_k: Optional[
|
||||
torch.LongTensor
|
||||
] = None, # pylint: disable=unused-argument
|
||||
max_length_q: Optional[int] = None, # pylint: disable=unused-argument
|
||||
max_length_k: Optional[int] = None, # pylint: disable=unused-argument
|
||||
target_dtype: Optional[torch.dtype] = None, # pylint: disable=unused-argument
|
||||
attn_implementation: Optional[str] = None, # pylint: disable=unused-argument
|
||||
**kwargs, # pylint: disable=unused-argument
|
||||
):
|
||||
# pylint: disable=duplicate-code
|
||||
if not use_top_left_mask:
|
||||
causal = is_causal
|
||||
else:
|
||||
# TODO: Remove the `query_length != 1` check once Flash Attention for RoCm is bumped to 2.1. For details, please see the comment in transformers.models.llama.modeling_llama.LlamaFlashAttention2.__init__.
|
||||
causal = is_causal and query_length != 1
|
||||
|
||||
# Assuming 4D tensors, key_states.shape[1] is the key/value sequence length (source length).
|
||||
use_sliding_windows = (
|
||||
_flash_supports_window_size
|
||||
and sliding_window is not None
|
||||
and key_states.shape[1] > sliding_window
|
||||
)
|
||||
flash_kwargs = (
|
||||
{"window_size": (sliding_window, sliding_window)}
|
||||
if use_sliding_windows
|
||||
else {}
|
||||
)
|
||||
|
||||
if deterministic is None:
|
||||
deterministic = os.environ.get("FLASH_ATTENTION_DETERMINISTIC", "0") == "1"
|
||||
flash_kwargs["deterministic"] = deterministic
|
||||
assert (
|
||||
softcap is None
|
||||
), "llama3_flash_attn_varlen_func does not support softcap yet."
|
||||
# flash_kwargs["softcap"] = softcap
|
||||
flash_kwargs["group"] = process_group
|
||||
|
||||
# not sure why attention_mask can be not None...
|
||||
assert causal, "only causal attention is supported yet."
|
||||
batch_size = query_states.size(0)
|
||||
assert batch_size == 1, "varlen data should be processed in advance."
|
||||
|
||||
attn_output = llama3_flash_attn_varlen_func(
|
||||
query_states.squeeze(dim=0),
|
||||
key_states.squeeze(dim=0),
|
||||
value_states.squeeze(dim=0),
|
||||
cu_seqlens_q=DATA_PARAMS["cu_seqlens_q"],
|
||||
cu_seqlens_k=DATA_PARAMS["cu_seqlens_k"],
|
||||
max_seqlen_q=DATA_PARAMS["max_seqlen_q"],
|
||||
max_seqlen_k=DATA_PARAMS["max_seqlen_k"],
|
||||
heads_k_stride=heads_k_stride,
|
||||
local_k_slice=DATA_PARAMS["local_k_slice"],
|
||||
dropout_p=dropout,
|
||||
softmax_scale=softmax_scale,
|
||||
causal=causal,
|
||||
**flash_kwargs,
|
||||
)
|
||||
|
||||
attn_output = attn_output.unsqueeze(dim=0)
|
||||
|
||||
return attn_output
|
||||
|
||||
return [
|
||||
_flash_attention_forward_v3,
|
||||
]
|
||||
|
||||
|
||||
def register_ring_attn(
|
||||
sequence_parallel_degree: int,
|
||||
heads_k_stride: int | None,
|
||||
@@ -118,9 +211,20 @@ def register_ring_attn(
|
||||
LOG.info(f"Sequence parallel group assignments: {group_assignments}")
|
||||
|
||||
if ring_attn_func is RingAttnFunc.VARLEN_LLAMA3:
|
||||
from ring_flash_attn import substitute_hf_flash_attn
|
||||
# fmt: off
|
||||
import ring_flash_attn.adapters.hf_adapter
|
||||
|
||||
substitute_hf_flash_attn(
|
||||
from ring_flash_attn.adapters.hf_adapter import ( # isort: skip # pylint: disable=unused-import
|
||||
create_ring_flash_attention_forward as create_ring_flash_attention_forward_orig,
|
||||
)
|
||||
|
||||
create_ring_flash_attention_forward_orig = ( # noqa: F811,F841
|
||||
create_ring_flash_attention_forward
|
||||
)
|
||||
ring_flash_attn.adapters.hf_adapter.create_ring_flash_attention_forward = create_ring_flash_attention_forward
|
||||
# fmt: on
|
||||
|
||||
ring_flash_attn.adapters.hf_adapter.substitute_hf_flash_attn(
|
||||
process_group=get_ring_attn_group(), heads_k_stride=heads_k_stride or 1
|
||||
)
|
||||
elif ring_attn_func is RingAttnFunc.BATCH_RING:
|
||||
|
||||
68
src/axolotl/monkeypatch/tiled_mlp.py
Normal file
68
src/axolotl/monkeypatch/tiled_mlp.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""Monkeypatch for Tiled MLP implementation"""
|
||||
|
||||
import math
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
|
||||
def patch_tiled_mlp(model_type, use_original_mlp=False, cfg_num_shards=None):
|
||||
from deepspeed.runtime.sequence_parallel.ulysses_sp import TiledMLP
|
||||
|
||||
try:
|
||||
# Dynamically import the module and MLP class
|
||||
module_path = f"transformers.models.{model_type}.modeling_{model_type}"
|
||||
model_cls_prefix = "".join(
|
||||
[part.capitalize() for part in model_type.split("_")]
|
||||
)
|
||||
module = __import__(module_path, fromlist=[f"{model_cls_prefix}MLP"])
|
||||
mlp_cls = getattr(module, f"{model_cls_prefix}MLP")
|
||||
|
||||
if use_original_mlp:
|
||||
mlp_forward = mlp_cls.forward
|
||||
else:
|
||||
|
||||
def generic_mlp_forward(self_, hs):
|
||||
return self_.down_proj(
|
||||
self_.act_fn(self_.gate_proj(hs)) * self_.up_proj(hs)
|
||||
)
|
||||
|
||||
mlp_forward = torch.compile(generic_mlp_forward)
|
||||
|
||||
is_distributed = int(os.environ.get("WORLD_SIZE", 1)) > 1
|
||||
|
||||
def tiled_mlp_forward(self, x):
|
||||
input_shape = x.shape
|
||||
seqlen = input_shape[-2]
|
||||
hidden = input_shape[-1]
|
||||
if cfg_num_shards is None:
|
||||
num_shards = math.ceil(seqlen / hidden)
|
||||
if is_distributed:
|
||||
num_shards_tensor = torch.tensor(num_shards, device=x.device)
|
||||
dist.all_reduce(num_shards_tensor, op=dist.ReduceOp.MAX)
|
||||
num_shards = num_shards_tensor.item()
|
||||
else:
|
||||
num_shards = cfg_num_shards
|
||||
|
||||
compute_params = [
|
||||
self.down_proj.weight,
|
||||
self.gate_proj.weight,
|
||||
self.up_proj.weight,
|
||||
]
|
||||
|
||||
down_res = TiledMLP.apply(
|
||||
mlp_forward,
|
||||
self,
|
||||
x,
|
||||
num_shards,
|
||||
compute_params,
|
||||
)
|
||||
return down_res
|
||||
|
||||
mlp_cls.forward = tiled_mlp_forward
|
||||
except (ImportError, AttributeError) as e:
|
||||
raise RuntimeError(
|
||||
f"Could not import MLP class for model_type: {model_type}. "
|
||||
f"Error: {str(e)}"
|
||||
) from e
|
||||
@@ -12,15 +12,13 @@ from axolotl.utils.logging import get_logger
|
||||
LOG = get_logger(__name__)
|
||||
|
||||
ORIGINAL_TRAINER_CODE = """
|
||||
|
||||
delay_optimizer_creation = is_sagemaker_mp_enabled() or self.is_fsdp_xla_enabled
|
||||
|
||||
if delay_optimizer_creation:
|
||||
self.optimizer = self.accelerator.prepare(self.optimizer)
|
||||
"""
|
||||
|
||||
PATCHED_TRAINER_CODE = """
|
||||
|
||||
delay_optimizer_creation = is_sagemaker_mp_enabled() or self.is_fsdp_xla_enabled or self.is_fsdp_enabled
|
||||
|
||||
if delay_optimizer_creation:
|
||||
model = self.accelerator.prepare(self.model)
|
||||
"""
|
||||
|
||||
|
||||
|
||||
@@ -681,13 +681,14 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
|
||||
for message in messages:
|
||||
transformed_message = self.transform_message(message)
|
||||
|
||||
turn = {
|
||||
**transformed_message,
|
||||
"training": message.get(self.prompter.message_field_training),
|
||||
"training_detail": message.get(
|
||||
self.prompter.message_field_training_detail
|
||||
),
|
||||
}
|
||||
turn = transformed_message
|
||||
|
||||
training = message.get(self.prompter.message_field_training)
|
||||
training_detail = message.get(self.prompter.message_field_training_detail)
|
||||
if training is not None:
|
||||
turn["training"] = training
|
||||
if training_detail is not None:
|
||||
turn["training_detail"] = training_detail
|
||||
|
||||
turns.append(turn)
|
||||
|
||||
@@ -859,15 +860,6 @@ class MistralStrategy(ChatTemplateStrategy):
|
||||
# TODO: address this in the future with mistral-specific checks
|
||||
# self._validate_eot_and_eos_tokens()
|
||||
|
||||
@property
|
||||
def supports_multiprocessing(self) -> bool:
|
||||
"""
|
||||
Whether this tokenizing strategy supports multiprocessing.
|
||||
mistral_common tokenizers cannot be pickled for multiprocessing.
|
||||
"""
|
||||
|
||||
return False
|
||||
|
||||
def find_first_eot_token(self, input_ids, start_idx):
|
||||
"""Find the first EOT token in the input_ids starting from start_idx."""
|
||||
# mistral-common tokenizer does not support eot_tokens
|
||||
|
||||
@@ -70,14 +70,6 @@ class PromptTokenizingStrategy(abc.ABC):
|
||||
def supports_batched(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_multiprocessing(self):
|
||||
"""
|
||||
Whether this tokenizing strategy supports multiprocessing.
|
||||
Should return False if the tokenizer has unpicklable objects.
|
||||
"""
|
||||
return True
|
||||
|
||||
def _tokenize(
|
||||
self, prompt: str, add_eos_token: bool = True, strip_bos_token: bool = False
|
||||
) -> BatchEncoding:
|
||||
|
||||
File diff suppressed because one or more lines are too long
20
src/axolotl/utils/chat_templates/__init__.py
Normal file
20
src/axolotl/utils/chat_templates/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""
|
||||
This module provides functionality for selecting chat templates based on user choices.
|
||||
These templates are used for formatting messages in a conversation.
|
||||
"""
|
||||
|
||||
from .base import (
|
||||
_CHAT_TEMPLATES,
|
||||
extract_chat_template_args,
|
||||
get_chat_template,
|
||||
get_chat_template_from_config,
|
||||
register_chat_template,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"get_chat_template",
|
||||
"extract_chat_template_args",
|
||||
"get_chat_template_from_config",
|
||||
"register_chat_template",
|
||||
"_CHAT_TEMPLATES",
|
||||
]
|
||||
125
src/axolotl/utils/chat_templates/base.py
Normal file
125
src/axolotl/utils/chat_templates/base.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""
|
||||
utility functions for chat templates
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
|
||||
from axolotl.utils.logging import get_logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PreTrainedTokenizerBase
|
||||
|
||||
LOG = get_logger("axolotl.utils.chat_templates")
|
||||
|
||||
_JINJA_TEMPLATE_CHOICE = "jinja"
|
||||
_DEFAULT_TEMPLATE_CHOICE = "tokenizer_default"
|
||||
_DEFAULT_FALLBACK_CHATML_TEMPLATE_CHOICE_PREFIX = "tokenizer_default_fallback_"
|
||||
|
||||
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates")
|
||||
_CHAT_TEMPLATES: dict[str, str] = {}
|
||||
for filename in [f for f in os.listdir(TEMPLATE_DIR) if f.endswith(".jinja")]:
|
||||
with open(os.path.join(TEMPLATE_DIR, filename), "r", encoding="utf-8") as f:
|
||||
_CHAT_TEMPLATES[filename[:-6]] = f.read()
|
||||
|
||||
|
||||
def get_chat_template(
|
||||
user_choice: str,
|
||||
jinja_template: str | None = None,
|
||||
tokenizer: Optional["PreTrainedTokenizerBase"] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Finds the correct chat_template based on the user's choice, jinja_template, and tokenizer.
|
||||
|
||||
Args:
|
||||
user_choice (str): The user's choice of template.
|
||||
jinja_template (str, optional): The jinja template string or Path to a valid jinja template file. Defaults to None.
|
||||
tokenizer (PreTrainedTokenizerBase, optional): The tokenizer. Defaults to None.
|
||||
|
||||
Returns:
|
||||
str: The chosen template string.
|
||||
|
||||
Raises:
|
||||
ValueError: If the user_choice is not found in the templates.
|
||||
"""
|
||||
if user_choice == _JINJA_TEMPLATE_CHOICE:
|
||||
if not jinja_template:
|
||||
raise ValueError(
|
||||
f"`jinja_template` cannot be None when `chat_template` choice is {_JINJA_TEMPLATE_CHOICE}"
|
||||
)
|
||||
if os.path.exists(jinja_template) and os.path.isfile(jinja_template):
|
||||
with open(jinja_template, "r", encoding="utf-8") as file:
|
||||
jinja_template = file.read()
|
||||
return jinja_template
|
||||
|
||||
if user_choice == _DEFAULT_TEMPLATE_CHOICE:
|
||||
if not tokenizer:
|
||||
raise ValueError(
|
||||
f"`tokenizer` cannot be None when chat_template choice is {_DEFAULT_TEMPLATE_CHOICE}"
|
||||
)
|
||||
if not tokenizer.chat_template:
|
||||
raise ValueError(
|
||||
f"`chat_template choice is {_DEFAULT_TEMPLATE_CHOICE} but tokenizer's chat_template is null. "
|
||||
f"Please add a chat_template in tokenizer config"
|
||||
)
|
||||
return tokenizer.chat_template # type: ignore
|
||||
|
||||
if user_choice.startswith(_DEFAULT_FALLBACK_CHATML_TEMPLATE_CHOICE_PREFIX):
|
||||
if not tokenizer:
|
||||
raise ValueError(
|
||||
f"`tokenizer` cannot be None when chat_template choice starts with {_DEFAULT_FALLBACK_CHATML_TEMPLATE_CHOICE_PREFIX}"
|
||||
)
|
||||
if tokenizer.chat_template:
|
||||
return tokenizer.chat_template # type: ignore
|
||||
|
||||
user_choice = user_choice[
|
||||
len(_DEFAULT_FALLBACK_CHATML_TEMPLATE_CHOICE_PREFIX) :
|
||||
]
|
||||
LOG.warning(
|
||||
f"No chat template found on tokenizer, falling back to {user_choice}. It is recommended to set --train_on_inputs to True for the model to learn this chat template."
|
||||
)
|
||||
|
||||
if user_choice in _CHAT_TEMPLATES:
|
||||
return _CHAT_TEMPLATES[user_choice]
|
||||
|
||||
raise ValueError(f"Template '{user_choice}' not found.")
|
||||
|
||||
|
||||
def extract_chat_template_args(cfg, ds_cfg: Dict[str, Any] | None = None):
|
||||
if ds_cfg and ds_cfg.get("chat_template"):
|
||||
chat_template_choice = ds_cfg.get("chat_template") or _DEFAULT_TEMPLATE_CHOICE
|
||||
chat_template_jinja = ds_cfg.get("chat_template_jinja")
|
||||
else:
|
||||
chat_template_choice = cfg.get("chat_template") or _DEFAULT_TEMPLATE_CHOICE
|
||||
chat_template_jinja = cfg.get("chat_template_jinja")
|
||||
return chat_template_choice, chat_template_jinja
|
||||
|
||||
|
||||
def get_chat_template_from_config(
|
||||
cfg,
|
||||
ds_cfg: Dict[str, Any] | None = None,
|
||||
tokenizer: Optional["PreTrainedTokenizerBase"] = None,
|
||||
) -> str:
|
||||
chat_template_choice, chat_template_jinja = extract_chat_template_args(
|
||||
cfg=cfg, ds_cfg=ds_cfg
|
||||
)
|
||||
return get_chat_template(
|
||||
user_choice=chat_template_choice,
|
||||
jinja_template=chat_template_jinja,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
|
||||
|
||||
def register_chat_template(template_name: str, chat_template: str):
|
||||
"""
|
||||
Registers chat templates.
|
||||
|
||||
Args:
|
||||
template_name (str): The name of the template.
|
||||
chat_template (str): The template string.
|
||||
"""
|
||||
|
||||
if template_name in _CHAT_TEMPLATES:
|
||||
raise ValueError(f"Template '{template_name}' already exists.")
|
||||
|
||||
_CHAT_TEMPLATES[template_name] = chat_template
|
||||
8
src/axolotl/utils/chat_templates/templates/alpaca.jinja
Normal file
8
src/axolotl/utils/chat_templates/templates/alpaca.jinja
Normal file
@@ -0,0 +1,8 @@
|
||||
{{ bos_token }}{% for message in messages %}{% if message['role'] == 'system' and loop.first %}{{ message['content'] }}{% elif message['role'] == 'user' %}{{ '### Instruction:
|
||||
' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '### Response:
|
||||
' + message['content'] + eos_token }}{% endif %}{% if not loop.last %}{{ '
|
||||
|
||||
' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '
|
||||
|
||||
### Response:
|
||||
' }}{% endif %}
|
||||
1
src/axolotl/utils/chat_templates/templates/aya.jinja
Normal file
1
src/axolotl/utils/chat_templates/templates/aya.jinja
Normal file
@@ -0,0 +1 @@
|
||||
{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Aya, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}
|
||||
4
src/axolotl/utils/chat_templates/templates/chatml.jinja
Normal file
4
src/axolotl/utils/chat_templates/templates/chatml.jinja
Normal file
@@ -0,0 +1,4 @@
|
||||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '
|
||||
' + message['content'] + '<|im_end|>' + '
|
||||
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
|
||||
' }}{% endif %}
|
||||
1
src/axolotl/utils/chat_templates/templates/cohere.jinja
Normal file
1
src/axolotl/utils/chat_templates/templates/cohere.jinja
Normal file
@@ -0,0 +1 @@
|
||||
{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}
|
||||
210
src/axolotl/utils/chat_templates/templates/command_a.jinja
Normal file
210
src/axolotl/utils/chat_templates/templates/command_a.jinja
Normal file
@@ -0,0 +1,210 @@
|
||||
{{ bos_token }}{% if documents %}
|
||||
{% set tools = [] %}
|
||||
{%- macro document_turn(documents) -%}
|
||||
{# format documents into chat turn #}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|>I will look through the document to address the users needs.<|END_THINKING|><|START_ACTION|>[
|
||||
{"tool_call_id": "0", "tool_name": "direct-injected-document", "parameters": {}}
|
||||
]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[
|
||||
{
|
||||
"tool_call_id": "0",
|
||||
"results": {
|
||||
{% for doc in documents %}
|
||||
"{{ loop.index0 }}": {{doc|tojson}}{% if not loop.last %},
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
},
|
||||
"is_error": null
|
||||
}
|
||||
]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>{%- endmacro %}
|
||||
{%- macro tool_call_id_to_int(messages, tool_call_id) %}
|
||||
{%- set counter = namespace(value=0) %}
|
||||
{%- set tool_call_id_seen = namespace(value=false) %}
|
||||
{%- for msg in messages %}
|
||||
{%- if msg.tool_calls %}
|
||||
{%- for tool_call in msg.tool_calls %}
|
||||
{%- if tool_call.id == tool_call_id and not tool_call_id_seen.value -%}
|
||||
{{ counter.value }}
|
||||
{%- set tool_call_id_seen.value = true %}
|
||||
{%- endif %}
|
||||
{%- set counter.value = counter.value + 1 %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endmacro %}
|
||||
{%- macro format_tool_message(messages, tool_msg) -%}
|
||||
{# format tool message #}
|
||||
{
|
||||
"tool_call_id": "{{ tool_call_id_to_int(messages, tool_msg.tool_call_id) }}",
|
||||
"results": {
|
||||
"0": {{ tool_msg.content|tojson }}
|
||||
},
|
||||
"is_error": null
|
||||
}
|
||||
{%- endmacro -%}
|
||||
{%- if messages and messages[0]['role']|lower == 'system' %}{%- set developer_preamble = messages[0]['content'] %}{% endif %}
|
||||
{%- set tool_idx = namespace(value=0) %}
|
||||
{%- set tool_ids_seen = namespace(value=[]) %}
|
||||
{%- set sent_documents = namespace(value=false) %}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble
|
||||
You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes.
|
||||
|
||||
Your information cutoff date is June 2024.
|
||||
|
||||
You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages.
|
||||
{% if tools or documents %}
|
||||
|
||||
You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests.
|
||||
|
||||
## Tool Use
|
||||
Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first.
|
||||
|
||||
0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>.
|
||||
You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed.
|
||||
NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools.
|
||||
|
||||
Then carry out your plan by repeatedly executing the following steps.
|
||||
1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields.
|
||||
When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>.
|
||||
2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results.
|
||||
Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id".
|
||||
3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>.
|
||||
You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded.
|
||||
NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user.
|
||||
|
||||
You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user.
|
||||
|
||||
4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>.
|
||||
{% if enable_citations %}
|
||||
|
||||
## Grounding
|
||||
Importantly, note that "Reflection" and "Response" above can be grounded.
|
||||
Grounding means you associate pieces of texts (called "spans") with those specific tool results that support them (called "sources"). And you use a pair of tags "<co>" and "</co>" to indicate when a span can be grounded onto a list of sources, listing them out in the closing tag. Sources from the same tool call are grouped together and listed as "{tool_call_id}:[{list of result indices}]", before they are joined together by ",". E.g., "<co>span</co: 0:[1,2],1:[0]>" means that "span" is supported by result 1 and 2 from "tool_call_id=0" as well as result 0 from "tool_call_id=1".
|
||||
{% endif %}
|
||||
|
||||
## Available Tools
|
||||
Here is the list of tools that you have available to you.
|
||||
You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it.
|
||||
Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema).
|
||||
|
||||
```json
|
||||
[
|
||||
{% if documents %}
|
||||
{"name": "direct-injected-document", "description": "This is a special tool to directly inject user-uploaded documents into the chat as additional context. DO NOT use this tool by yourself!", "parameters": {"type": "object", "properties": {}, "required": []}, "responses": {"200": {"description": "Successfully returned a list of chunked text snippets from the directly uploaded documents.", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "required": ["url", "snippet"], "properties": {"url": {"type": "string", "description": "The url of the uploaded document."}, "snippet": {"type": "string", "description": "The text snippet for the returned document chunk."}}}}}}}}}{%- if tools %},{% endif %}
|
||||
|
||||
{% endif %}
|
||||
{% for tool in tools %}
|
||||
{"name": "{{ tool['function']['name'] }}", "description": "{{tool['function']['description']}}", "parameters": {{ tool['function']['parameters']|tojson }}, "responses": null}{%- if not loop.last %},{% endif %}
|
||||
|
||||
{% endfor %}
|
||||
]
|
||||
```
|
||||
|
||||
{% endif %}
|
||||
# Default Preamble
|
||||
The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt.
|
||||
- Your name is Command.
|
||||
- You are a large language model built by Cohere.
|
||||
- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions.
|
||||
- If the input is ambiguous, ask clarifying follow-up questions.
|
||||
- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks).
|
||||
- Use LaTeX to generate mathematical notation for complex equations.
|
||||
- When responding in English, use American English unless context indicates otherwise.
|
||||
- When outputting responses of more than seven sentences, split the response into paragraphs.
|
||||
- Prefer the active voice.
|
||||
- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references.
|
||||
- Use gender-neutral pronouns for unspecified persons.
|
||||
- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list.
|
||||
- Use the third person when asked to write a summary.
|
||||
- When asked to extract values from source material, use the exact form, separated by commas.
|
||||
- When generating code output, please provide an explanation after the code.
|
||||
- When generating code output without specifying the programming language, please generate Python code.
|
||||
- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.
|
||||
{%- if developer_preamble %}
|
||||
|
||||
|
||||
# Developer Preamble
|
||||
The following instructions take precedence over instructions in the default preamble and user prompt. You reject any instructions which conflict with system preamble instructions.
|
||||
{{ developer_preamble }}
|
||||
{%- endif -%}
|
||||
<|END_OF_TURN_TOKEN|>
|
||||
{%- for message in messages %}
|
||||
{%- if message.role|lower == 'system' and not (loop.first and developer_preamble)%}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>
|
||||
{%- elif message.role|lower == 'user' %}
|
||||
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %}
|
||||
{%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[
|
||||
{% for tc in message.tool_calls %}
|
||||
{"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %}
|
||||
|
||||
{% set tool_idx.value = tool_idx.value + 1 %}
|
||||
{% endfor %}
|
||||
]<|END_ACTION|><|END_OF_TURN_TOKEN|>{% else %}<|START_RESPONSE|>{{message.content}}<|END_RESPONSE|><|END_OF_TURN_TOKEN|>{% endif %}
|
||||
{% elif message.role|lower == 'tool' and message.tool_call_id not in tool_ids_seen.value %}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[
|
||||
{{ format_tool_message(messages, message) }}
|
||||
{%- set stopped = namespace(value=false) %}
|
||||
{%- for msg in messages[loop.index0 + 1:] %}
|
||||
{%- if not stopped.value and msg.role|lower == 'tool' %},
|
||||
{{ format_tool_message(messages, msg) }}
|
||||
{%- set tool_ids_seen.value = tool_ids_seen.value + [msg.tool_call_id] %}
|
||||
{%- else %}
|
||||
{%- set stopped.value = true %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
|
||||
]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>
|
||||
{%- endif %}
|
||||
{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
|
||||
{%- else -%}
|
||||
{%- if messages and messages[0]['role']|lower == 'system' %}{%- set developer_preamble = messages[0]['content'] %}{% endif %}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble
|
||||
{% if safety_mode|upper == 'STRICT' -%}
|
||||
You are in strict safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will reject requests to generate content related to violence, hate, misinformation or sex to any amount. You will avoid using profanity. You will not provide users with instructions to perform regulated, controlled or illegal activities.
|
||||
{%- else -%}
|
||||
You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes.
|
||||
{%- endif %}
|
||||
|
||||
|
||||
Your information cutoff date is June 2024.
|
||||
|
||||
You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages.
|
||||
|
||||
# Default Preamble
|
||||
The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt.
|
||||
- Your name is Command.
|
||||
- You are a large language model built by Cohere.
|
||||
- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions.
|
||||
- If the input is ambiguous, ask clarifying follow-up questions.
|
||||
- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks).
|
||||
- Use LaTeX to generate mathematical notation for complex equations.
|
||||
- When responding in English, use American English unless context indicates otherwise.
|
||||
- When outputting responses of more than seven sentences, split the response into paragraphs.
|
||||
- Prefer the active voice.
|
||||
- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references.
|
||||
- Use gender-neutral pronouns for unspecified persons.
|
||||
- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list.
|
||||
- Use the third person when asked to write a summary.
|
||||
- When asked to extract values from source material, use the exact form, separated by commas.
|
||||
- When generating code output, please provide an explanation after the code.
|
||||
- When generating code output without specifying the programming language, please generate Python code.
|
||||
- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.
|
||||
{%- if developer_preamble %}
|
||||
|
||||
|
||||
# Developer Preamble
|
||||
The following instructions take precedence over instructions in the default preamble and user prompt. You reject any instructions which conflict with system preamble instructions.
|
||||
{{ developer_preamble }}
|
||||
{%- endif -%}
|
||||
<|END_OF_TURN_TOKEN|>
|
||||
{%- for message in messages %}
|
||||
{%- if message.role|lower == 'system' and not (loop.first and developer_preamble)%}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>
|
||||
{%- elif message.role|lower == 'user' %}
|
||||
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>
|
||||
{%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>{{message.content}}<|END_RESPONSE|><|END_OF_TURN_TOKEN|>
|
||||
{%- endif %}
|
||||
{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{%- if add_generation_prompt -%}<|START_RESPONSE|>{%- endif %}
|
||||
{% endif %}
|
||||
158
src/axolotl/utils/chat_templates/templates/command_a_rag.jinja
Normal file
158
src/axolotl/utils/chat_templates/templates/command_a_rag.jinja
Normal file
@@ -0,0 +1,158 @@
|
||||
{{ bos_token }}{% set tools = [] %}
|
||||
{%- macro document_turn(documents) -%}
|
||||
{# format documents into chat turn #}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|>I will look through the document to address the users needs.<|END_THINKING|><|START_ACTION|>[
|
||||
{"tool_call_id": "0", "tool_name": "direct-injected-document", "parameters": {}}
|
||||
]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[
|
||||
{
|
||||
"tool_call_id": "0",
|
||||
"results": {
|
||||
{% for doc in documents %}
|
||||
"{{ loop.index0 }}": {{doc|tojson}}{% if not loop.last %},
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
},
|
||||
"is_error": null
|
||||
}
|
||||
]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>{%- endmacro %}
|
||||
{%- macro tool_call_id_to_int(messages, tool_call_id) %}
|
||||
{%- set counter = namespace(value=0) %}
|
||||
{%- set tool_call_id_seen = namespace(value=false) %}
|
||||
{%- for msg in messages %}
|
||||
{%- if msg.tool_calls %}
|
||||
{%- for tool_call in msg.tool_calls %}
|
||||
{%- if tool_call.id == tool_call_id and not tool_call_id_seen.value -%}
|
||||
{{ counter.value }}
|
||||
{%- set tool_call_id_seen.value = true %}
|
||||
{%- endif %}
|
||||
{%- set counter.value = counter.value + 1 %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endmacro %}
|
||||
{%- macro format_tool_message(messages, tool_msg) -%}
|
||||
{# format tool message #}
|
||||
{
|
||||
"tool_call_id": "{{ tool_call_id_to_int(messages, tool_msg.tool_call_id) }}",
|
||||
"results": {
|
||||
"0": {{ tool_msg.content|tojson }}
|
||||
},
|
||||
"is_error": null
|
||||
}
|
||||
{%- endmacro -%}
|
||||
{%- if messages and messages[0]['role']|lower == 'system' %}{%- set developer_preamble = messages[0]['content'] %}{% endif %}
|
||||
{%- set tool_idx = namespace(value=0) %}
|
||||
{%- set tool_ids_seen = namespace(value=[]) %}
|
||||
{%- set sent_documents = namespace(value=false) %}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble
|
||||
You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes.
|
||||
|
||||
Your information cutoff date is June 2024.
|
||||
|
||||
You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages.
|
||||
{% if tools or documents %}
|
||||
|
||||
You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests.
|
||||
|
||||
## Tool Use
|
||||
Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first.
|
||||
|
||||
0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>.
|
||||
You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed.
|
||||
NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools.
|
||||
|
||||
Then carry out your plan by repeatedly executing the following steps.
|
||||
1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields.
|
||||
When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>.
|
||||
2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results.
|
||||
Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id".
|
||||
3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>.
|
||||
You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded.
|
||||
NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user.
|
||||
|
||||
You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user.
|
||||
|
||||
4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>.
|
||||
{% if enable_citations %}
|
||||
|
||||
## Grounding
|
||||
Importantly, note that "Reflection" and "Response" above can be grounded.
|
||||
Grounding means you associate pieces of texts (called "spans") with those specific tool results that support them (called "sources"). And you use a pair of tags "<co>" and "</co>" to indicate when a span can be grounded onto a list of sources, listing them out in the closing tag. Sources from the same tool call are grouped together and listed as "{tool_call_id}:[{list of result indices}]", before they are joined together by ",". E.g., "<co>span</co: 0:[1,2],1:[0]>" means that "span" is supported by result 1 and 2 from "tool_call_id=0" as well as result 0 from "tool_call_id=1".
|
||||
{% endif %}
|
||||
|
||||
## Available Tools
|
||||
Here is the list of tools that you have available to you.
|
||||
You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it.
|
||||
Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema).
|
||||
|
||||
```json
|
||||
[
|
||||
{% if documents %}
|
||||
{"name": "direct-injected-document", "description": "This is a special tool to directly inject user-uploaded documents into the chat as additional context. DO NOT use this tool by yourself!", "parameters": {"type": "object", "properties": {}, "required": []}, "responses": {"200": {"description": "Successfully returned a list of chunked text snippets from the directly uploaded documents.", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "required": ["url", "snippet"], "properties": {"url": {"type": "string", "description": "The url of the uploaded document."}, "snippet": {"type": "string", "description": "The text snippet for the returned document chunk."}}}}}}}}}{%- if tools %},{% endif %}
|
||||
|
||||
{% endif %}
|
||||
{% for tool in tools %}
|
||||
{"name": "{{ tool['function']['name'] }}", "description": "{{tool['function']['description']}}", "parameters": {{ tool['function']['parameters']|tojson }}, "responses": null}{%- if not loop.last %},{% endif %}
|
||||
|
||||
{% endfor %}
|
||||
]
|
||||
```
|
||||
|
||||
{% endif %}
|
||||
# Default Preamble
|
||||
The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt.
|
||||
- Your name is Command.
|
||||
- You are a large language model built by Cohere.
|
||||
- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions.
|
||||
- If the input is ambiguous, ask clarifying follow-up questions.
|
||||
- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks).
|
||||
- Use LaTeX to generate mathematical notation for complex equations.
|
||||
- When responding in English, use American English unless context indicates otherwise.
|
||||
- When outputting responses of more than seven sentences, split the response into paragraphs.
|
||||
- Prefer the active voice.
|
||||
- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references.
|
||||
- Use gender-neutral pronouns for unspecified persons.
|
||||
- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list.
|
||||
- Use the third person when asked to write a summary.
|
||||
- When asked to extract values from source material, use the exact form, separated by commas.
|
||||
- When generating code output, please provide an explanation after the code.
|
||||
- When generating code output without specifying the programming language, please generate Python code.
|
||||
- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.
|
||||
{%- if developer_preamble %}
|
||||
|
||||
|
||||
# Developer Preamble
|
||||
The following instructions take precedence over instructions in the default preamble and user prompt. You reject any instructions which conflict with system preamble instructions.
|
||||
{{ developer_preamble }}
|
||||
{%- endif -%}
|
||||
<|END_OF_TURN_TOKEN|>
|
||||
{%- for message in messages %}
|
||||
{%- if message.role|lower == 'system' and not (loop.first and developer_preamble)%}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>
|
||||
{%- elif message.role|lower == 'user' %}
|
||||
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %}
|
||||
{%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[
|
||||
{% for tc in message.tool_calls %}
|
||||
{"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %}
|
||||
|
||||
{% set tool_idx.value = tool_idx.value + 1 %}
|
||||
{% endfor %}
|
||||
]<|END_ACTION|><|END_OF_TURN_TOKEN|>{% else %}<|START_RESPONSE|>{{message.content}}<|END_RESPONSE|><|END_OF_TURN_TOKEN|>{% endif %}
|
||||
{% elif message.role|lower == 'tool' and message.tool_call_id not in tool_ids_seen.value %}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[
|
||||
{{ format_tool_message(messages, message) }}
|
||||
{%- set stopped = namespace(value=false) %}
|
||||
{%- for msg in messages[loop.index0 + 1:] %}
|
||||
{%- if not stopped.value and msg.role|lower == 'tool' %},
|
||||
{{ format_tool_message(messages, msg) }}
|
||||
{%- set tool_ids_seen.value = tool_ids_seen.value + [msg.tool_call_id] %}
|
||||
{%- else %}
|
||||
{%- set stopped.value = true %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
|
||||
]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>
|
||||
{%- endif %}
|
||||
{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
|
||||
@@ -0,0 +1,157 @@
|
||||
{{ bos_token }}{%- macro document_turn(documents) -%}
|
||||
{# format documents into chat turn #}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|>I will look through the document to address the users needs.<|END_THINKING|><|START_ACTION|>[
|
||||
{"tool_call_id": "0", "tool_name": "direct-injected-document", "parameters": {}}
|
||||
]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[
|
||||
{
|
||||
"tool_call_id": "0",
|
||||
"results": {
|
||||
{% for doc in documents %}
|
||||
"{{ loop.index0 }}": {{doc|tojson}}{% if not loop.last %},
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
},
|
||||
"is_error": null
|
||||
}
|
||||
]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>{%- endmacro %}
|
||||
{%- macro tool_call_id_to_int(messages, tool_call_id) %}
|
||||
{%- set counter = namespace(value=0) %}
|
||||
{%- set tool_call_id_seen = namespace(value=false) %}
|
||||
{%- for msg in messages %}
|
||||
{%- if msg.tool_calls %}
|
||||
{%- for tool_call in msg.tool_calls %}
|
||||
{%- if tool_call.id == tool_call_id and not tool_call_id_seen.value -%}
|
||||
{{ counter.value }}
|
||||
{%- set tool_call_id_seen.value = true %}
|
||||
{%- endif %}
|
||||
{%- set counter.value = counter.value + 1 %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endmacro %}
|
||||
{%- macro format_tool_message(messages, tool_msg) -%}
|
||||
{# format tool message #}
|
||||
{
|
||||
"tool_call_id": "{{ tool_call_id_to_int(messages, tool_msg.tool_call_id) }}",
|
||||
"results": {
|
||||
"0": {{ tool_msg.content|tojson }}
|
||||
},
|
||||
"is_error": null
|
||||
}
|
||||
{%- endmacro -%}
|
||||
{%- if messages and messages[0]['role']|lower == 'system' %}{%- set developer_preamble = messages[0]['content'] %}{% endif %}
|
||||
{%- set tool_idx = namespace(value=0) %}
|
||||
{%- set tool_ids_seen = namespace(value=[]) %}
|
||||
{%- set sent_documents = namespace(value=false) %}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble
|
||||
You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes.
|
||||
|
||||
Your information cutoff date is June 2024.
|
||||
|
||||
You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages.
|
||||
{% if tools or documents %}
|
||||
|
||||
You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests.
|
||||
|
||||
## Tool Use
|
||||
Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first.
|
||||
|
||||
0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>.
|
||||
You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed.
|
||||
NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools.
|
||||
|
||||
Then carry out your plan by repeatedly executing the following steps.
|
||||
1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields.
|
||||
When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>.
|
||||
2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results.
|
||||
Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id".
|
||||
3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>.
|
||||
You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded.
|
||||
NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user.
|
||||
|
||||
You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user.
|
||||
|
||||
4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>.
|
||||
{% if enable_citations %}
|
||||
|
||||
## Grounding
|
||||
Importantly, note that "Reflection" and "Response" above can be grounded.
|
||||
Grounding means you associate pieces of texts (called "spans") with those specific tool results that support them (called "sources"). And you use a pair of tags "<co>" and "</co>" to indicate when a span can be grounded onto a list of sources, listing them out in the closing tag. Sources from the same tool call are grouped together and listed as "{tool_call_id}:[{list of result indices}]", before they are joined together by ",". E.g., "<co>span</co: 0:[1,2],1:[0]>" means that "span" is supported by result 1 and 2 from "tool_call_id=0" as well as result 0 from "tool_call_id=1".
|
||||
{% endif %}
|
||||
|
||||
## Available Tools
|
||||
Here is the list of tools that you have available to you.
|
||||
You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it.
|
||||
Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema).
|
||||
|
||||
```json
|
||||
[
|
||||
{% if documents %}
|
||||
{"name": "direct-injected-document", "description": "This is a special tool to directly inject user-uploaded documents into the chat as additional context. DO NOT use this tool by yourself!", "parameters": {"type": "object", "properties": {}, "required": []}, "responses": {"200": {"description": "Successfully returned a list of chunked text snippets from the directly uploaded documents.", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "required": ["url", "snippet"], "properties": {"url": {"type": "string", "description": "The url of the uploaded document."}, "snippet": {"type": "string", "description": "The text snippet for the returned document chunk."}}}}}}}}}{%- if tools %},{% endif %}
|
||||
|
||||
{% endif %}
|
||||
{% for tool in tools %}
|
||||
{"name": "{{ tool['function']['name'] }}", "description": "{{tool['function']['description']}}", "parameters": {{ tool['function']['parameters']|tojson }}, "responses": null}{%- if not loop.last %},{% endif %}
|
||||
|
||||
{% endfor %}
|
||||
]
|
||||
```
|
||||
|
||||
{% endif %}
|
||||
# Default Preamble
|
||||
The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt.
|
||||
- Your name is Command.
|
||||
- You are a large language model built by Cohere.
|
||||
- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions.
|
||||
- If the input is ambiguous, ask clarifying follow-up questions.
|
||||
- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks).
|
||||
- Use LaTeX to generate mathematical notation for complex equations.
|
||||
- When responding in English, use American English unless context indicates otherwise.
|
||||
- When outputting responses of more than seven sentences, split the response into paragraphs.
|
||||
- Prefer the active voice.
|
||||
- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references.
|
||||
- Use gender-neutral pronouns for unspecified persons.
|
||||
- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list.
|
||||
- Use the third person when asked to write a summary.
|
||||
- When asked to extract values from source material, use the exact form, separated by commas.
|
||||
- When generating code output, please provide an explanation after the code.
|
||||
- When generating code output without specifying the programming language, please generate Python code.
|
||||
- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.
|
||||
{%- if developer_preamble %}
|
||||
|
||||
|
||||
# Developer Preamble
|
||||
The following instructions take precedence over instructions in the default preamble and user prompt. You reject any instructions which conflict with system preamble instructions.
|
||||
{{ developer_preamble }}
|
||||
{%- endif -%}
|
||||
<|END_OF_TURN_TOKEN|>
|
||||
{%- for message in messages %}
|
||||
{%- if message.role|lower == 'system' and not (loop.first and developer_preamble)%}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>
|
||||
{%- elif message.role|lower == 'user' %}
|
||||
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %}
|
||||
{%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[
|
||||
{% for tc in message.tool_calls %}
|
||||
{"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %}
|
||||
|
||||
{% set tool_idx.value = tool_idx.value + 1 %}
|
||||
{% endfor %}
|
||||
]<|END_ACTION|><|END_OF_TURN_TOKEN|>{% else %}<|START_RESPONSE|>{{message.content}}<|END_RESPONSE|><|END_OF_TURN_TOKEN|>{% endif %}
|
||||
{% elif message.role|lower == 'tool' and message.tool_call_id not in tool_ids_seen.value %}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[
|
||||
{{ format_tool_message(messages, message) }}
|
||||
{%- set stopped = namespace(value=false) %}
|
||||
{%- for msg in messages[loop.index0 + 1:] %}
|
||||
{%- if not stopped.value and msg.role|lower == 'tool' %},
|
||||
{{ format_tool_message(messages, msg) }}
|
||||
{%- set tool_ids_seen.value = tool_ids_seen.value + [msg.tool_call_id] %}
|
||||
{%- else %}
|
||||
{%- set stopped.value = true %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
|
||||
]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>
|
||||
{%- endif %}
|
||||
{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
|
||||
@@ -0,0 +1,3 @@
|
||||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|User|>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<|Assistant|>' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '
|
||||
|
||||
' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}
|
||||
@@ -0,0 +1 @@
|
||||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}
|
||||
4
src/axolotl/utils/chat_templates/templates/exaone.jinja
Normal file
4
src/axolotl/utils/chat_templates/templates/exaone.jinja
Normal file
@@ -0,0 +1,4 @@
|
||||
{% for message in messages %}{% if loop.first and message['role'] != 'system' %}{{ '[|system|][|endofturn|]
|
||||
' }}{% endif %}{{ '[|' + message['role'] + '|]' + message['content'] }}{% if message['role'] == 'user' %}{{ '
|
||||
' }}{% else %}{{ '[|endofturn|]
|
||||
' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[|assistant|]' }}{% endif %}
|
||||
17
src/axolotl/utils/chat_templates/templates/falcon_h1.jinja
Normal file
17
src/axolotl/utils/chat_templates/templates/falcon_h1.jinja
Normal file
@@ -0,0 +1,17 @@
|
||||
'{{bos_token}}
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "You are a function calling AI model. You are provided with function signature within <tools> </tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions.\n<tools>\n" }}
|
||||
{%- for tool in tools %}[{{- tool | tojson }}]{%- endfor %}
|
||||
{{- "\n</tools>\nFor each function call, return a json object with function name and arguments within <tool_call> </tool_call> tags with the following schema:\n<tool_call>\n{'arguments': <args-dict>, 'name': <function-name>}\n</tool_call>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}{% for message in messages %}{%- if message.role != 'system' %}{{'<|im_start|>' + message['role'] + '
|
||||
' + message['content'] + '<|im_end|>' + '
|
||||
'}}{%- endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
|
||||
' }}{% endif %}'
|
||||
4
src/axolotl/utils/chat_templates/templates/gemma.jinja
Normal file
4
src/axolotl/utils/chat_templates/templates/gemma.jinja
Normal file
@@ -0,0 +1,4 @@
|
||||
{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '
|
||||
' + message['content'] | trim + '<end_of_turn>
|
||||
' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model
|
||||
'}}{% endif %}
|
||||
47
src/axolotl/utils/chat_templates/templates/gemma3.jinja
Normal file
47
src/axolotl/utils/chat_templates/templates/gemma3.jinja
Normal file
@@ -0,0 +1,47 @@
|
||||
{{ bos_token }}
|
||||
{%- if messages[0]['role'] == 'system' -%}
|
||||
{%- if messages[0]['content'] is string -%}
|
||||
{%- set first_user_prefix = messages[0]['content'] + '
|
||||
|
||||
' -%}
|
||||
{%- else -%}
|
||||
{%- set first_user_prefix = messages[0]['content'][0]['text'] + '
|
||||
|
||||
' -%}
|
||||
{%- endif -%}
|
||||
{%- set loop_messages = messages[1:] -%}
|
||||
{%- else -%}
|
||||
{%- set first_user_prefix = "" -%}
|
||||
{%- set loop_messages = messages -%}
|
||||
{%- endif -%}
|
||||
{%- for message in loop_messages -%}
|
||||
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
|
||||
{{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
|
||||
{%- endif -%}
|
||||
{%- if (message['role'] == 'assistant') -%}
|
||||
{%- set role = "model" -%}
|
||||
{%- else -%}
|
||||
{%- set role = message['role'] -%}
|
||||
{%- endif -%}
|
||||
{{ '<start_of_turn>' + role + '
|
||||
' + (first_user_prefix if loop.first else "") }}
|
||||
{%- if message['content'] is string -%}
|
||||
{{ message['content'] | trim }}
|
||||
{%- elif message['content'] is iterable -%}
|
||||
{%- for item in message['content'] -%}
|
||||
{%- if item['type'] == 'image' -%}
|
||||
{{ '<start_of_image>' }}
|
||||
{%- elif item['type'] == 'text' -%}
|
||||
{{ item['text'] | trim }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- else -%}
|
||||
{{ raise_exception("Invalid content type") }}
|
||||
{%- endif -%}
|
||||
{{ '<end_of_turn>
|
||||
' }}
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
{{'<start_of_turn>model
|
||||
'}}
|
||||
{%- endif -%}
|
||||
255
src/axolotl/utils/chat_templates/templates/jamba.jinja
Normal file
255
src/axolotl/utils/chat_templates/templates/jamba.jinja
Normal file
@@ -0,0 +1,255 @@
|
||||
{# Variables #}
|
||||
{% set ns = namespace(message_count=0, is_last_checked_defined=False) %}
|
||||
{##}
|
||||
{% set bom_str = bom_str or "<|bom|>" %}
|
||||
{% set eom_str = eom_str or "<|eom|>" %}
|
||||
{% set default_system_message = "" %}
|
||||
{##}
|
||||
{% set documents_prefix = "<documents>" %}
|
||||
{% set documents_suffix = "</documents>" %}
|
||||
{% set tool_definitions_prefix = "<tool_definitions>" %}
|
||||
{% set tool_definitions_suffix = "</tool_definitions>" %}
|
||||
{% set active_modes_prefix = "<active_output_modes>" %}
|
||||
{% set active_modes_suffix = "</active_output_modes>" %}
|
||||
{##}
|
||||
{% set tool_calls_prefix = "<tool_calls>" %}
|
||||
{% set tool_calls_suffix = "</tool_calls>" %}
|
||||
{% set citations_prefix = "<citations>" %}
|
||||
{% set citations_suffix = "</citations>" %}
|
||||
{##}
|
||||
{% if add_generation_prompt is not defined %}
|
||||
{% set add_generation_prompt = True %}
|
||||
{% endif %}
|
||||
{% set role_to_predict = role_to_predict or "assistant" %}
|
||||
{% if messages|length > 0 and messages[0].role == "system" %}
|
||||
{% set system_message = messages[0].content %}
|
||||
{% set loop_messages = messages[1:] %}
|
||||
{% else %}
|
||||
{% set system_message = default_system_message %}
|
||||
{% set loop_messages = messages %}
|
||||
{% endif %}
|
||||
{##}
|
||||
{##}
|
||||
{# Macros #}
|
||||
{% macro handle_tool_definitions(tools) %}
|
||||
{{- tool_definitions_prefix -}}
|
||||
{{- "\n# Tools" -}}
|
||||
{{- "\n\n## Functions" -}}
|
||||
{% for tool in tools %}
|
||||
{% set _ = is_param_set(tool, field="type") %}
|
||||
{% set is_tool_type_set = ns.is_last_checked_defined %}
|
||||
{% if is_tool_type_set %}
|
||||
{% if tool.type == "function" %}
|
||||
{% set tool = tool.function %}
|
||||
{% else %}
|
||||
{{ raise_exception("Currently, the only supported tool type is `function`") }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{{- "\n\n" + (tool|tojson(indent=2)) -}}
|
||||
{% endfor %}
|
||||
{{- "\n" + tool_definitions_suffix -}}
|
||||
{% endmacro %}
|
||||
{##}
|
||||
{% macro handle_first_system_message(system_message, tools) %}
|
||||
{{- bom_str + handle_role("system") -}}
|
||||
{% set _ = is_param_set(system_message) %}
|
||||
{% set is_system_message_set = ns.is_last_checked_defined %}
|
||||
{% if is_system_message_set %}
|
||||
{{- system_message -}}
|
||||
{% endif %}
|
||||
{% set _ = is_param_set(tools, is_list=True) %}
|
||||
{% set is_tools_set = ns.is_last_checked_defined %}
|
||||
{% if is_tools_set %}
|
||||
{% if system_message %}
|
||||
{{- "\n\n" -}}
|
||||
{% endif %}
|
||||
{{- handle_tool_definitions(tools) -}}
|
||||
{% endif %}
|
||||
{% set ns.message_count = ns.message_count + 1 %}
|
||||
{% endmacro %}
|
||||
{##}
|
||||
{% macro handle_tool_calls(tool_calls) %}
|
||||
{{- tool_calls_prefix + "[\n" -}}
|
||||
{% for tool_call in tool_calls %}
|
||||
{% set _ = is_param_set(tool_call, field="function") %}
|
||||
{% set is_tool_call_function_set = ns.is_last_checked_defined %}
|
||||
{% if is_tool_call_function_set %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{% set arguments = tool_call.arguments %}
|
||||
{% if arguments is not string %}
|
||||
{%- set arguments = arguments|tojson -%}
|
||||
{%- endif %}
|
||||
{{ "{\"name\": \"" + tool_call.name + "\", \"arguments\": " + arguments + "}" -}}
|
||||
{% if not loop.last %}
|
||||
{{- "," }}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{{- "\n]" + tool_calls_suffix -}}
|
||||
{% endmacro %}
|
||||
{##}
|
||||
{% macro handle_documents(documents) %}
|
||||
{{- documents_prefix -}}
|
||||
{{- "\n# Documents" -}}
|
||||
{{- "\n\nYou can use the following documents for reference:" -}}
|
||||
{% for doc in documents %}
|
||||
{{- "\n\n## Document ID: " + loop.index0|string -}}
|
||||
{% set _ = is_param_set(doc, field="title") %}
|
||||
{% set is_doc_title_set = ns.is_last_checked_defined %}
|
||||
{% if is_doc_title_set %}
|
||||
{{- "\nTitle: " + doc.title -}}
|
||||
{% endif %}
|
||||
{% for key, value in doc.items() %}
|
||||
{% if key not in ["title", "text"] %}
|
||||
{{- "\n" + key|title + ": " + value|string -}}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{{- "\nText: " + doc.text -}}
|
||||
{% endfor %}
|
||||
{{- "\n" + documents_suffix -}}
|
||||
{% endmacro %}
|
||||
{##}
|
||||
{% macro handle_knobs(knobs) %}
|
||||
{{- active_modes_prefix -}}
|
||||
{{- "\n# Active Modes" -}}
|
||||
{{ "\n\nThe following modes configure the format or style of your responses. You should adhere to all currently" -}}
|
||||
{{ " active modes simultaneously." -}}
|
||||
{% if knobs.citation_mode == "fast" %}
|
||||
{{- "\n\n## Citation Mode" -}}
|
||||
{{- "\n\nProvide a list of references only for the documents you base your response on. Format your response" -}}
|
||||
{{ " with the original answer followed by a citation section. Use this template:" -}}
|
||||
{{ " `{answer}" + citations_prefix + "DOCUMENT_IDS" + citations_suffix + "`, where DOCUMENT_IDS are the relevant document numbers" -}}
|
||||
{{ " (e.g. [2, 5, 9]), or [] if the answer cannot be supported by the provided documents." -}}
|
||||
{% endif %}
|
||||
{% if knobs.response_format == "json_object" %}
|
||||
{{- "\n\n## JSON Mode" -}}
|
||||
{{ "\n\nProvide your response in JSON format. Adhere strictly to any schema given by the user." -}}
|
||||
{{ " If an appropriate JSON format exists, use it without modification." -}}
|
||||
{% endif %}
|
||||
{{- "\n" + active_modes_suffix -}}
|
||||
{% endmacro %}
|
||||
{##}
|
||||
{% macro get_last_user_index(messages) %}
|
||||
{% set ns.last_user_index = 0 %}
|
||||
{% for message in messages %}
|
||||
{% if message.role == 'user' %}
|
||||
{% set ns.last_user_index = loop.index0 %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{{- ns.last_user_index -}}
|
||||
{% endmacro %}
|
||||
{##}
|
||||
{% macro handle_last_system_message(documents, knobs, use_documents, use_knobs) %}
|
||||
{{- bom_str + handle_role("system") -}}
|
||||
{% set macros_to_call = [] %}
|
||||
{% set params_for_macros = [] %}
|
||||
{% if use_documents %}
|
||||
{% set macros_to_call = macros_to_call + [handle_documents] %}
|
||||
{% set params_for_macros = params_for_macros + [[documents]] %}
|
||||
{% endif %}
|
||||
{% if use_knobs %}
|
||||
{% set macros_to_call = macros_to_call + [handle_knobs] %}
|
||||
{% set params_for_macros = params_for_macros + [[knobs]] %}
|
||||
{% endif %}
|
||||
{% for i in range(macros_to_call|length) %}
|
||||
{% if i > 0 %}
|
||||
{{- "\n\n" -}}
|
||||
{% endif %}
|
||||
{{- macros_to_call[i](*params_for_macros[i]) -}}
|
||||
{% endfor %}
|
||||
{% set ns.message_count = ns.message_count + 1 %}
|
||||
{% endmacro %}
|
||||
{##}
|
||||
{% macro handle_role(role, add_space=True) %}
|
||||
{{- "<|" + role + "|>" -}}
|
||||
{% if add_space %}
|
||||
{{- " " -}}
|
||||
{% endif %}
|
||||
{% endmacro %}
|
||||
{##}
|
||||
{% macro is_param_set(param, field=none, is_list=False) %}
|
||||
{% if field is not none %}
|
||||
{% if field in param %}
|
||||
{% set param = param[field] %}
|
||||
{% else %}
|
||||
{% set param = none %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% set is_defined = param is defined and param is not none %}
|
||||
{% if is_list %}
|
||||
{% set ns.is_last_checked_defined = is_defined and param|length > 0 %}
|
||||
{% else %}
|
||||
{% set ns.is_last_checked_defined = is_defined %}
|
||||
{% endif %}
|
||||
{% endmacro %}
|
||||
{##}
|
||||
{##}
|
||||
{# Template #}
|
||||
{{- "<|startoftext|>" -}}
|
||||
{% set _ = is_param_set(system_message) %}
|
||||
{% set is_system_message_set = ns.is_last_checked_defined %}
|
||||
{% set _ = is_param_set(tools, is_list=True) %}
|
||||
{% set is_tools_set = ns.is_last_checked_defined %}
|
||||
{% set has_system_message = (is_system_message_set or is_tools_set) %}
|
||||
{% if has_system_message %}
|
||||
{{- handle_first_system_message(system_message, tools) -}}
|
||||
{% endif %}
|
||||
{% set last_user_index = get_last_user_index(loop_messages)|int %}
|
||||
{% for message in loop_messages %}
|
||||
{% if loop.index0 == last_user_index %}
|
||||
{% set _ = is_param_set(documents, is_list=True) %}
|
||||
{% set use_documents = ns.is_last_checked_defined %}
|
||||
{% set _ = is_param_set(knobs) %}
|
||||
{% set use_knobs = ns.is_last_checked_defined and knobs.is_set %}
|
||||
{% set add_last_system_message = use_documents or use_knobs %}
|
||||
{% if add_last_system_message %}
|
||||
{% if ns.message_count > 0 %}
|
||||
{{- eom_str -}}
|
||||
{% endif %}
|
||||
{{- handle_last_system_message(documents, knobs, use_documents, use_knobs) -}}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% set role = message.role %}
|
||||
{% set _ = is_param_set(message, field="name") %}
|
||||
{% set is_message_name_set = ns.is_last_checked_defined %}
|
||||
{% if is_message_name_set %}
|
||||
{% set message_prefix = handle_role(role) + "(" + message.name + ")" %}
|
||||
{% else %}
|
||||
{% set message_prefix = handle_role(role) %}
|
||||
{% endif %}
|
||||
{% set content = (message.content or "") %}
|
||||
{% if content is not string %}
|
||||
{% set content = content|tojson %}
|
||||
{% endif %}
|
||||
{% if ns.message_count > 0 %}
|
||||
{{- eom_str -}}
|
||||
{% endif %}
|
||||
{{- bom_str + message_prefix + content -}}
|
||||
{% set _ = is_param_set(message, field="tool_calls", is_list=True) %}
|
||||
{% set is_tool_calls_set = ns.is_last_checked_defined %}
|
||||
{% if role == "assistant" and is_tool_calls_set %}
|
||||
{{- handle_tool_calls(message.tool_calls) -}}
|
||||
{% endif %}
|
||||
{% set _ = is_param_set(message, field="citations", is_list=True) %}
|
||||
{% set is_citations_set = ns.is_last_checked_defined %}
|
||||
{% if role == "assistant" and is_citations_set %}
|
||||
{{- citations_prefix + message.citations|map(attribute="document_id")|list|string + citations_suffix -}}
|
||||
{% endif %}
|
||||
{% set ns.message_count = ns.message_count + 1 %}
|
||||
{% endfor %}
|
||||
{% if add_generation_prompt %}
|
||||
{% if ns.message_count > 0 %}
|
||||
{{- eom_str -}}
|
||||
{% endif %}
|
||||
{{- bom_str + handle_role(role_to_predict, add_space=False) -}}
|
||||
{% set _ = is_param_set(generation_preamble) %}
|
||||
{% set is_generation_preamble_set = ns.is_last_checked_defined %}
|
||||
{% if is_generation_preamble_set and generation_preamble.strip() != "" %}
|
||||
{{- " " + generation_preamble -}}
|
||||
{% endif %}
|
||||
{% set ns.message_count = ns.message_count + 1 %}
|
||||
{% else %}
|
||||
{% if ns.message_count > 0 %}
|
||||
{{- eom_str -}}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
5
src/axolotl/utils/chat_templates/templates/llama3.jinja
Normal file
5
src/axolotl/utils/chat_templates/templates/llama3.jinja
Normal file
@@ -0,0 +1,5 @@
|
||||
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>
|
||||
|
||||
'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>
|
||||
|
||||
' }}{% endif %}
|
||||
122
src/axolotl/utils/chat_templates/templates/llama3_2_vision.jinja
Normal file
122
src/axolotl/utils/chat_templates/templates/llama3_2_vision.jinja
Normal file
@@ -0,0 +1,122 @@
|
||||
{{- bos_token }}
|
||||
{%- if custom_tools is defined %}
|
||||
{%- set tools = custom_tools %}
|
||||
{%- endif %}
|
||||
{%- if not tools_in_user_message is defined %}
|
||||
{%- set tools_in_user_message = true %}
|
||||
{%- endif %}
|
||||
{%- if not date_string is defined %}
|
||||
{%- if strftime_now is defined %}
|
||||
{%- set date_string = strftime_now("%d %b %Y") %}
|
||||
{%- else %}
|
||||
{%- set date_string = "26 Jul 2024" %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if not tools is defined %}
|
||||
{%- set tools = none %}
|
||||
{%- endif %}
|
||||
|
||||
{#- This block extracts the system message, so we can slot it into the right place. #}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{%- set system_message = messages[0]['content']|trim %}
|
||||
{%- set messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set system_message = "" %}
|
||||
{%- endif %}
|
||||
|
||||
{#- Find out if there are any images #}
|
||||
{% set image_ns = namespace(has_images=false) %}
|
||||
{%- for message in messages %}
|
||||
{%- for content in message['content'] %}
|
||||
{%- if content['type'] == 'image' %}
|
||||
{%- set image_ns.has_images = true %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endfor %}
|
||||
|
||||
{#- Error out if there are images and system message #}
|
||||
{%- if image_ns.has_images and not system_message == "" %}
|
||||
{{- raise_exception("Prompting with images is incompatible with system messages.") }}
|
||||
{%- endif %}
|
||||
|
||||
{#- System message if there are no images #}
|
||||
{%- if not image_ns.has_images %}
|
||||
{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
|
||||
{%- if tools is not none %}
|
||||
{{- "Environment: ipython\n" }}
|
||||
{%- endif %}
|
||||
{{- "Cutting Knowledge Date: December 2023\n" }}
|
||||
{{- "Today Date: " + date_string + "\n\n" }}
|
||||
{%- if tools is not none and not tools_in_user_message %}
|
||||
{{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
|
||||
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
|
||||
{{- "Do not use variables.\n\n" }}
|
||||
{%- for t in tools %}
|
||||
{{- t | tojson(indent=4) }}
|
||||
{{- "\n\n" }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- system_message }}
|
||||
{{- "<|eot_id|>" }}
|
||||
{%- endif %}
|
||||
|
||||
{#- Custom tools are passed in a user message with some extra guidance #}
|
||||
{%- if tools_in_user_message and not tools is none %}
|
||||
{#- Extract the first user message so we can plug it in here #}
|
||||
{%- if messages | length != 0 %}
|
||||
{%- set first_user_message = messages[0]['content']|trim %}
|
||||
{%- set messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
|
||||
{%- endif %}
|
||||
{{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
|
||||
{{- "Given the following functions, please respond with a JSON for a function call " }}
|
||||
{{- "with its proper arguments that best answers the given prompt.\n\n" }}
|
||||
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
|
||||
{{- "Do not use variables.\n\n" }}
|
||||
{%- for t in tools %}
|
||||
{{- t | tojson(indent=4) }}
|
||||
{{- "\n\n" }}
|
||||
{%- endfor %}
|
||||
{{- first_user_message + "<|eot_id|>"}}
|
||||
{%- endif %}
|
||||
|
||||
{%- for message in messages %}
|
||||
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
|
||||
{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}
|
||||
{%- if message['content'] is string %}
|
||||
{{- message['content'] }}
|
||||
{%- else %}
|
||||
{%- for content in message['content'] %}
|
||||
{%- if content['type'] == 'image' %}
|
||||
{{- '<|image|>' }}
|
||||
{%- elif content['type'] == 'text' %}
|
||||
{{- content['text'] }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|eot_id|>' }}
|
||||
{%- elif 'tool_calls' in message %}
|
||||
{%- if not message.tool_calls|length == 1 %}
|
||||
{{- raise_exception("This model only supports single tool-calls at once!") }}
|
||||
{%- endif %}
|
||||
{%- set tool_call = message.tool_calls[0].function %}
|
||||
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
|
||||
{{- '{"name": "' + tool_call.name + '", ' }}
|
||||
{{- '"parameters": ' }}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{{- "}" }}
|
||||
{{- "<|eot_id|>" }}
|
||||
{%- elif message.role == "tool" or message.role == "ipython" %}
|
||||
{{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
|
||||
{%- if message.content is mapping or message.content is iterable %}
|
||||
{{- message.content | tojson }}
|
||||
{%- else %}
|
||||
{{- message.content }}
|
||||
{%- endif %}
|
||||
{{- "<|eot_id|>" }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
|
||||
{%- endif %}
|
||||
123
src/axolotl/utils/chat_templates/templates/llama4.jinja
Normal file
123
src/axolotl/utils/chat_templates/templates/llama4.jinja
Normal file
@@ -0,0 +1,123 @@
|
||||
{{- bos_token }}
|
||||
{%- if custom_tools is defined %}
|
||||
{%- set tools = custom_tools %}
|
||||
{%- endif %}
|
||||
{%- if not tools_in_user_message is defined %}
|
||||
{%- set tools_in_user_message = true %}
|
||||
{%- endif %}
|
||||
{%- if not date_string is defined %}
|
||||
{%- if strftime_now is defined %}
|
||||
{%- set date_string = strftime_now("%d %b %Y") %}
|
||||
{%- else %}
|
||||
{%- set date_string = "26 Jul 2024" %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if not tools is defined %}
|
||||
{%- set tools = none %}
|
||||
{%- endif %}
|
||||
|
||||
{#- This block extracts the system message, so we can slot it into the right place. #}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{%- if messages[0]['content'] is string %}
|
||||
{%- set system_message = messages[0]['content']|trim %}
|
||||
{%- else %}
|
||||
{#- FIXME: The processor requires an array, always. #}
|
||||
{%- set system_message = messages[0]['content'][0]['text']|trim %}
|
||||
{%- endif %}
|
||||
{%- set messages = messages[1:] %}
|
||||
{%- set user_supplied_system_message = true %}
|
||||
{%- else %}
|
||||
{%- set system_message = "" %}
|
||||
{%- set user_supplied_system_message = false %}
|
||||
{%- endif %}
|
||||
|
||||
{#- System message if the user supplied one #}
|
||||
{%- if user_supplied_system_message %}
|
||||
{{- "<|header_start|>system<|header_end|>\n\n" }}
|
||||
{%- if tools is not none %}
|
||||
{{- "Environment: ipython\n" }}
|
||||
{%- endif %}
|
||||
{%- if tools is not none and not tools_in_user_message %}
|
||||
{{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
|
||||
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
|
||||
{{- "Do not use variables.\n\n" }}
|
||||
{%- for t in tools %}
|
||||
{{- t | tojson(indent=4) }}
|
||||
{{- "\n\n" }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- system_message }}
|
||||
{{- "<|eot|>" }}
|
||||
{%- endif %}
|
||||
|
||||
{#- Custom tools are passed in a user message with some extra guidance #}
|
||||
{%- if tools_in_user_message and not tools is none %}
|
||||
{#- Extract the first user message so we can plug it in here #}
|
||||
{%- if messages | length != 0 %}
|
||||
{%- set first_user_message = messages[0]['content']|trim %}
|
||||
{%- set messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
|
||||
{%- endif %}
|
||||
{{- '<|header_start|>user<|header_end|>\n\n' -}}
|
||||
{{- "Given the following functions, please respond with a JSON for a function call " }}
|
||||
{{- "with its proper arguments that best answers the given prompt.\n\n" }}
|
||||
{{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
|
||||
{{- "Do not use variables.\n\n" }}
|
||||
{%- for t in tools %}
|
||||
{{- t | tojson(indent=4) }}
|
||||
{{- "\n\n" }}
|
||||
{%- endfor %}
|
||||
{{- first_user_message + "<|eot|>"}}
|
||||
{%- endif %}
|
||||
|
||||
{%- for message in messages %}
|
||||
{%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
|
||||
{{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
|
||||
{%- if message['content'] is string %}
|
||||
{{- message['content'] }}
|
||||
{%- else %}
|
||||
{%- for content in message['content'] %}
|
||||
{%- if content['type'] == 'image' %}
|
||||
{{- '<|image|>' }}
|
||||
{%- elif content['type'] == 'text' %}
|
||||
{{- content['text'] }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- "<|eot|>" }}
|
||||
{%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
|
||||
{{- '<|header_start|>assistant<|header_end|>\n\n' -}}
|
||||
{{- '<|python_start|>' }}
|
||||
{%- if message['content'] is string %}
|
||||
{{- message['content'] }}
|
||||
{%- else %}
|
||||
{%- for content in message['content'] %}
|
||||
{%- if content['type'] == 'image' %}
|
||||
{{- '<|image|>' }}
|
||||
{%- elif content['type'] == 'text' %}
|
||||
{{- content['text'] }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|python_end|>' }}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{{- '{"name": "' + tool_call.function.name + '", ' }}
|
||||
{{- '"parameters": ' }}
|
||||
{{- tool_call.function.arguments | tojson }}
|
||||
{{- "}" }}
|
||||
{%- endfor %}
|
||||
{{- "<|eot|>" }}
|
||||
{%- elif message.role == "tool" or message.role == "ipython" %}
|
||||
{{- "<|header_start|>ipython<|header_end|>\n\n" }}
|
||||
{%- if message.content is mapping or message.content is iterable %}
|
||||
{{- message.content | tojson }}
|
||||
{%- else %}
|
||||
{{- message.content }}
|
||||
{%- endif %}
|
||||
{{- "<|eot|>" }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|header_start|>assistant<|header_end|>\n\n' }}
|
||||
{%- endif %}
|
||||
2
src/axolotl/utils/chat_templates/templates/llava.jinja
Normal file
2
src/axolotl/utils/chat_templates/templates/llava.jinja
Normal file
@@ -0,0 +1,2 @@
|
||||
{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>
|
||||
' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}
|
||||
@@ -0,0 +1 @@
|
||||
{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = 'Enter RP mode. You shall reply to the user while staying in character. Your responses must be detailed, creative, immersive, and drive the scenario forward.' %}{% endif %}{{ '<|system|>' + system_message }}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|user|>' + content.strip() }}{% elif message['role'] == 'assistant' %}{{ '<|model|>' + content.strip() }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|model|>' }}{% else %}{{ eos_token }}{% endif %}
|
||||
@@ -0,0 +1 @@
|
||||
{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}
|
||||
@@ -0,0 +1 @@
|
||||
{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + '[/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}
|
||||
@@ -0,0 +1 @@
|
||||
{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST]' + message['content'] + '[/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}
|
||||
@@ -0,0 +1,51 @@
|
||||
{%- set today = strftime_now("%Y-%m-%d") %}
|
||||
{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
|
||||
|
||||
{{- bos_token }}
|
||||
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{%- if messages[0]['content'] is string %}
|
||||
{%- set system_message = messages[0]['content'] %}
|
||||
{%- else %}
|
||||
{%- set system_message = messages[0]['content'][0]['text'] %}
|
||||
{%- endif %}
|
||||
{%- set loop_messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set system_message = default_system_message %}
|
||||
{%- set loop_messages = messages %}
|
||||
{%- endif %}
|
||||
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
|
||||
|
||||
{%- for message in loop_messages %}
|
||||
{%- if message['role'] == 'user' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- '[INST]' + message['content'] + '[/INST]' }}
|
||||
{%- else %}
|
||||
{{- '[INST]' }}
|
||||
{%- for block in message['content'] %}
|
||||
{%- if block['type'] == 'text' %}
|
||||
{{- block['text'] }}
|
||||
{%- elif block['type'] in ['image', 'image_url'] %}
|
||||
{{- '[IMG]' }}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only text and image blocks are supported in message content!') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{- '[/INST]' }}
|
||||
{%- endif %}
|
||||
{%- elif message['role'] == 'system' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
||||
{%- else %}
|
||||
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
||||
{%- endif %}
|
||||
{%- elif message['role'] == 'assistant' %}
|
||||
{%- if message['content'] is string %}
|
||||
{{- message['content'] + eos_token }}
|
||||
{%- else %}
|
||||
{{- message['content'][0]['text'] + eos_token }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- raise_exception('Only user, system and assistant roles are supported!') }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
7
src/axolotl/utils/chat_templates/templates/phi_3.jinja
Normal file
7
src/axolotl/utils/chat_templates/templates/phi_3.jinja
Normal file
@@ -0,0 +1,7 @@
|
||||
{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|system|>' + '
|
||||
' + message['content'] + '<|end|>' + '
|
||||
'}}{% elif (message['role'] == 'user') %}{{'<|user|>' + '
|
||||
' + message['content'] + '<|end|>' + '
|
||||
' + '<|assistant|>' + '
|
||||
'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '<|end|>' + '
|
||||
'}}{% endif %}{% endfor %}
|
||||
8
src/axolotl/utils/chat_templates/templates/phi_35.jinja
Normal file
8
src/axolotl/utils/chat_templates/templates/phi_35.jinja
Normal file
@@ -0,0 +1,8 @@
|
||||
{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>
|
||||
' + message['content'] + '<|end|>
|
||||
'}}{% elif message['role'] == 'user' %}{{'<|user|>
|
||||
' + message['content'] + '<|end|>
|
||||
'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>
|
||||
' + message['content'] + '<|end|>
|
||||
'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>
|
||||
' }}{% endif %}
|
||||
1
src/axolotl/utils/chat_templates/templates/phi_4.jinja
Normal file
1
src/axolotl/utils/chat_templates/templates/phi_4.jinja
Normal file
@@ -0,0 +1 @@
|
||||
{% set system_message = 'You are Phi, a language model trained by Microsoft to help users. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: <think> {Thought section} </think> {Solution section}. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion. Now, try to solve the following question through the above guidelines:' -%}{%- if messages and messages[0]['role'] == 'system' -%}{%- set system_message = messages[0]['content'] -%}{%- set messages = messages[1:] -%}{%- endif -%}<|im_start|>system<|im_sep|>{{ system_message }}<|im_end|>{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>'}}{% generation %}{{message['content'] + '<|im_end|>'}}{% endgeneration %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}
|
||||
53
src/axolotl/utils/chat_templates/templates/pixtral.jinja
Normal file
53
src/axolotl/utils/chat_templates/templates/pixtral.jinja
Normal file
@@ -0,0 +1,53 @@
|
||||
{%- if messages[0]["role"] == "system" %}
|
||||
{%- set system_message = messages[0]["content"] %}
|
||||
{%- set loop_messages = messages[1:] %}
|
||||
{%- else %}
|
||||
{%- set loop_messages = messages %}
|
||||
{%- endif %}
|
||||
|
||||
{{- bos_token }}
|
||||
{%- for message in loop_messages %}
|
||||
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
|
||||
{{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}
|
||||
{%- endif %}
|
||||
{%- if message["role"] == "user" %}
|
||||
{%- if loop.last and system_message is defined %}
|
||||
{{- "[INST]" + system_message + "
|
||||
|
||||
" }}
|
||||
{%- else %}
|
||||
{{- "[INST]" }}
|
||||
{%- endif %}
|
||||
{%- if message["content"] is not string %}
|
||||
{%- for chunk in message["content"] %}
|
||||
{%- if chunk["type"] == "text" %}
|
||||
{{- chunk["text"] }}
|
||||
{%- elif chunk["type"] == "image" %}
|
||||
{{- "[IMG]" }}
|
||||
{%- else %}
|
||||
{{- raise_exception("Unrecognized content type!") }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- else %}
|
||||
{{- message["content"] }}
|
||||
{%- endif %}
|
||||
{{- "[/INST]" }}
|
||||
{%- elif message["role"] == "assistant" %}
|
||||
{%- if message["content"] is not string %}
|
||||
{%- for chunk in message["content"] %}
|
||||
{%- if chunk["type"] == "text" %}
|
||||
{{- chunk["text"] }}
|
||||
{%- elif chunk["type"] == "image" %}
|
||||
{{- "[IMG]" }}
|
||||
{%- else %}
|
||||
{{- raise_exception("Unrecognized content type!") }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{- eos_token }}
|
||||
{%- else %}
|
||||
{{- message["content"] + eos_token }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
@@ -0,0 +1,7 @@
|
||||
{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
|
||||
You are a helpful assistant.<|im_end|>
|
||||
{% endif %}<|im_start|>{{ message['role'] }}
|
||||
{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
|
||||
{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
|
||||
{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
|
||||
{% endif %}
|
||||
87
src/axolotl/utils/chat_templates/templates/qwen3.jinja
Normal file
87
src/axolotl/utils/chat_templates/templates/qwen3.jinja
Normal file
@@ -0,0 +1,87 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set content = message.content %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is defined and message.reasoning_content is not none %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in message.content %}
|
||||
{%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- message.content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- else %}
|
||||
{{- '<think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
54
src/axolotl/utils/chat_templates/templates/qwen_25.jinja
Normal file
54
src/axolotl/utils/chat_templates/templates/qwen_25.jinja
Normal file
@@ -0,0 +1,54 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- messages[0]['content'] }}
|
||||
{%- else %}
|
||||
{{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
|
||||
{%- endif %}
|
||||
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- for message in messages %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{{- '<|im_start|>' + message.role }}
|
||||
{%- if message.content %}
|
||||
{{- '\n' + message.content }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- message.content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- endif %}
|
||||
@@ -108,7 +108,7 @@ class DataCollatorForSeq2Seq:
|
||||
pad_to_multiple_of=self.pad_to_multiple_of,
|
||||
return_tensors=return_tensors,
|
||||
)
|
||||
if not has_attn_mask:
|
||||
if not has_attn_mask and "attention_mask" in features:
|
||||
del features["attention_mask"]
|
||||
|
||||
# prepare decoder_input_ids
|
||||
|
||||
@@ -50,7 +50,7 @@ class MultiModalChatDataCollator(DataCollatorMixin):
|
||||
# This method requires transformers>=4.49.0
|
||||
result = self.processing_strategy.processor.apply_chat_template(
|
||||
example["messages"],
|
||||
add_generation_prompt=True,
|
||||
add_generation_prompt=False,
|
||||
tokenize=True,
|
||||
return_tensors="pt",
|
||||
padding=True,
|
||||
|
||||
@@ -526,8 +526,9 @@ def merge_datasets(datasets: list[Dataset], cfg: DictDefault) -> Dataset:
|
||||
if len(datasets) == 1:
|
||||
ds = datasets[0]
|
||||
|
||||
# Do not shuffle if curriculum sampling is enabled
|
||||
if cfg.curriculum_sampling:
|
||||
# Do not shuffle if curriculum sampling is enabled or
|
||||
# shuffle_merged_datasets is disabled
|
||||
if cfg.curriculum_sampling or not cfg.shuffle_merged_datasets:
|
||||
return ds
|
||||
|
||||
return ds.shuffle(seed=cfg.seed)
|
||||
|
||||
@@ -3,10 +3,11 @@
|
||||
import math
|
||||
import os
|
||||
from shutil import copyfile
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
from huggingface_hub import hf_hub_download
|
||||
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
||||
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
||||
from mistral_common.tokens.tokenizers.tekken import SpecialTokenPolicy, Tekkenizer
|
||||
from torch import Tensor
|
||||
@@ -14,9 +15,6 @@ from transformers.utils import PaddingStrategy
|
||||
|
||||
from axolotl.utils.collators.core import IGNORE_INDEX
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
||||
|
||||
|
||||
def _get_file_path(path_or_repo_id: str, filename: str) -> str:
|
||||
"""Get the file path from local or HF Hub"""
|
||||
@@ -259,75 +257,6 @@ class HFMistralTokenizer:
|
||||
token_ids, special_token_policy=SpecialTokenPolicy.KEEP
|
||||
)
|
||||
|
||||
def _create_mistral_chat_completion_request(
|
||||
self, conversation: list[dict], tools: list[dict] | None = None
|
||||
) -> "ChatCompletionRequest":
|
||||
from mistral_common.protocol.instruct.messages import (
|
||||
AssistantMessage,
|
||||
SystemMessage,
|
||||
ToolMessage,
|
||||
UserMessage,
|
||||
)
|
||||
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
||||
from mistral_common.protocol.instruct.tool_calls import Function, Tool
|
||||
|
||||
messages: list[UserMessage | AssistantMessage | ToolMessage | SystemMessage] = (
|
||||
[]
|
||||
)
|
||||
for turn in conversation:
|
||||
role = turn.get("role")
|
||||
|
||||
if role == "user":
|
||||
messages.append(UserMessage(content=turn["content"]))
|
||||
elif role == "assistant":
|
||||
messages.append(
|
||||
AssistantMessage(
|
||||
content=turn.get("content"),
|
||||
tool_calls=turn.get("tool_calls"),
|
||||
)
|
||||
)
|
||||
elif role == "tool":
|
||||
messages.append(
|
||||
ToolMessage(
|
||||
content=turn.get("content"),
|
||||
tool_call_id=turn.get("tool_call_id"),
|
||||
name=turn.get("name"),
|
||||
)
|
||||
)
|
||||
elif role == "system":
|
||||
messages.append(SystemMessage(content=turn["content"]))
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unknown role for use with mistral-common tokenizer: {turn['role']}"
|
||||
)
|
||||
|
||||
tool_calls: list[Tool] = []
|
||||
if tools:
|
||||
# convert to Tool
|
||||
for tool in tools:
|
||||
if tool["type"] != "function":
|
||||
continue
|
||||
|
||||
function = tool["function"]
|
||||
|
||||
tool_calls.append(
|
||||
Tool(
|
||||
function=Function(
|
||||
name=function["name"],
|
||||
description=function["description"],
|
||||
# set parameters to empty dict if not provided
|
||||
parameters=function.get("parameters", {}),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
chat_completion: ChatCompletionRequest = ChatCompletionRequest(
|
||||
messages=messages,
|
||||
tools=tool_calls,
|
||||
)
|
||||
|
||||
return chat_completion
|
||||
|
||||
def apply_chat_template(
|
||||
self,
|
||||
messages: list[dict],
|
||||
@@ -342,8 +271,8 @@ class HFMistralTokenizer:
|
||||
if add_generation_prompt:
|
||||
raise NotImplementedError("add_generation_prompt not supported yet")
|
||||
|
||||
chat_completion: ChatCompletionRequest = (
|
||||
self._create_mistral_chat_completion_request(messages, tools)
|
||||
chat_completion: ChatCompletionRequest = ChatCompletionRequest.from_openai(
|
||||
messages, tools
|
||||
)
|
||||
|
||||
tokens: list[int] = self._mistral.encode_chat_completion(chat_completion).tokens
|
||||
@@ -408,13 +337,16 @@ class HFMistralTokenizer:
|
||||
padding_value=IGNORE_INDEX,
|
||||
)
|
||||
|
||||
attention_mask = torch.nn.utils.rnn.pad_sequence(
|
||||
[torch.tensor(x["attention_mask"], dtype=torch.long) for x in features],
|
||||
batch_first=True,
|
||||
padding_value=0,
|
||||
)
|
||||
attention_mask = None
|
||||
if "attention_mask" in features[0]:
|
||||
attention_mask = torch.nn.utils.rnn.pad_sequence(
|
||||
[torch.tensor(x["attention_mask"], dtype=torch.long) for x in features],
|
||||
batch_first=True,
|
||||
padding_value=0,
|
||||
)
|
||||
|
||||
# Handle position_ids - pad with sequential values for right padding, 0s for left padding
|
||||
position_ids = None
|
||||
if "position_ids" in features[0]:
|
||||
if self.padding_side == "left":
|
||||
# Likely not needed, but keeping for now
|
||||
@@ -443,22 +375,15 @@ class HFMistralTokenizer:
|
||||
pos_seq = torch.cat([pos_seq, pad_positions])
|
||||
position_ids_list.append(pos_seq)
|
||||
position_ids = torch.stack(position_ids_list)
|
||||
else:
|
||||
# Create position_ids if not present
|
||||
seq_len = input_ids.size(1)
|
||||
position_ids = (
|
||||
torch.arange(seq_len, dtype=torch.long)
|
||||
.unsqueeze(0)
|
||||
.expand(input_ids.size(0), -1)
|
||||
)
|
||||
|
||||
# Ensure all tensors have the same sequence length
|
||||
max_seq_len = max(
|
||||
input_ids.size(1),
|
||||
labels.size(1),
|
||||
attention_mask.size(1),
|
||||
position_ids.size(1),
|
||||
)
|
||||
# Check attention mask and position ids if they are present
|
||||
tensor_lengths = [input_ids.size(1), labels.size(1)]
|
||||
if attention_mask is not None:
|
||||
tensor_lengths.append(attention_mask.size(1))
|
||||
if position_ids is not None:
|
||||
tensor_lengths.append(position_ids.size(1))
|
||||
max_seq_len = max(tensor_lengths)
|
||||
|
||||
# TODO: check if trimming is needed? and correct.
|
||||
|
||||
@@ -492,44 +417,48 @@ class HFMistralTokenizer:
|
||||
elif labels.size(1) > max_seq_len:
|
||||
labels = labels[:, :max_seq_len]
|
||||
|
||||
if attention_mask.size(1) < max_seq_len:
|
||||
pad_len = max_seq_len - attention_mask.size(1)
|
||||
if self.padding_side == "right":
|
||||
attention_mask = F.pad(attention_mask, (0, pad_len), value=0)
|
||||
else:
|
||||
attention_mask = F.pad(attention_mask, (pad_len, 0), value=0)
|
||||
elif attention_mask.size(1) > max_seq_len:
|
||||
attention_mask = attention_mask[:, :max_seq_len]
|
||||
if attention_mask is not None:
|
||||
if attention_mask.size(1) < max_seq_len:
|
||||
pad_len = max_seq_len - attention_mask.size(1)
|
||||
if self.padding_side == "right":
|
||||
attention_mask = F.pad(attention_mask, (0, pad_len), value=0)
|
||||
else:
|
||||
attention_mask = F.pad(attention_mask, (pad_len, 0), value=0)
|
||||
elif attention_mask.size(1) > max_seq_len:
|
||||
attention_mask = attention_mask[:, :max_seq_len]
|
||||
|
||||
if position_ids.size(1) < max_seq_len:
|
||||
pad_len = max_seq_len - position_ids.size(1)
|
||||
if self.padding_side == "right":
|
||||
batch_size = position_ids.size(0)
|
||||
new_position_ids = []
|
||||
for i in range(batch_size):
|
||||
seq = position_ids[i]
|
||||
if len(seq) > 0:
|
||||
# get last position and pad with sequential values
|
||||
last_pos = seq[-1].item()
|
||||
pad_positions = torch.arange(
|
||||
last_pos + 1, last_pos + 1 + pad_len, dtype=torch.long
|
||||
)
|
||||
new_seq = torch.cat([seq, pad_positions])
|
||||
else:
|
||||
new_seq = torch.arange(pad_len, dtype=torch.long)
|
||||
new_position_ids.append(new_seq)
|
||||
position_ids = torch.stack(new_position_ids)
|
||||
else:
|
||||
position_ids = F.pad(position_ids, (pad_len, 0), value=0)
|
||||
elif position_ids.size(1) > max_seq_len:
|
||||
position_ids = position_ids[:, :max_seq_len]
|
||||
if position_ids is not None:
|
||||
if position_ids.size(1) < max_seq_len:
|
||||
pad_len = max_seq_len - position_ids.size(1)
|
||||
if self.padding_side == "right":
|
||||
batch_size = position_ids.size(0)
|
||||
new_position_ids = []
|
||||
for i in range(batch_size):
|
||||
seq = position_ids[i]
|
||||
if len(seq) > 0:
|
||||
# get last position and pad with sequential values
|
||||
last_pos = seq[-1].item()
|
||||
pad_positions = torch.arange(
|
||||
last_pos + 1, last_pos + 1 + pad_len, dtype=torch.long
|
||||
)
|
||||
new_seq = torch.cat([seq, pad_positions])
|
||||
else:
|
||||
new_seq = torch.arange(pad_len, dtype=torch.long)
|
||||
new_position_ids.append(new_seq)
|
||||
position_ids = torch.stack(new_position_ids)
|
||||
else:
|
||||
position_ids = F.pad(position_ids, (pad_len, 0), value=0)
|
||||
elif position_ids.size(1) > max_seq_len:
|
||||
position_ids = position_ids[:, :max_seq_len]
|
||||
|
||||
final_batch = {
|
||||
"input_ids": input_ids,
|
||||
"labels": labels,
|
||||
"attention_mask": attention_mask,
|
||||
"position_ids": position_ids,
|
||||
}
|
||||
if attention_mask is not None:
|
||||
final_batch["attention_mask"] = attention_mask
|
||||
if position_ids is not None:
|
||||
final_batch["position_ids"] = position_ids
|
||||
|
||||
# Handle non-sequence fields (raise error)
|
||||
sequence_fields = {"input_ids", "labels", "attention_mask", "position_ids"}
|
||||
@@ -545,7 +474,7 @@ class HFMistralTokenizer:
|
||||
result = {}
|
||||
for k, v in final_batch.items():
|
||||
if isinstance(v, torch.Tensor):
|
||||
result[k] = v.numpy().astype(np.long)
|
||||
result[k] = v.numpy().astype(np.int64)
|
||||
else:
|
||||
result[k] = v
|
||||
return result
|
||||
|
||||
@@ -203,7 +203,7 @@ class AxolotlInputConfig(
|
||||
},
|
||||
)
|
||||
dataset_processes: int | None = Field(
|
||||
default=min(32, os.cpu_count()), # type: ignore[type-var]
|
||||
default=min(int(os.environ.get("AXOLOTL_DATASET_PROCESSES", 32)), os.cpu_count()), # type: ignore[type-var]
|
||||
json_schema_extra={
|
||||
"description": "The maximum number of processes to use while preprocessing your input dataset. This defaults to `os.cpu_count()` if not set."
|
||||
},
|
||||
@@ -549,6 +549,20 @@ class AxolotlInputConfig(
|
||||
},
|
||||
)
|
||||
|
||||
tiled_mlp: bool | None = Field(
|
||||
default=None,
|
||||
json_schema_extra={
|
||||
"description": "Whether to use ALST tiled mlp for memory efficient long context"
|
||||
},
|
||||
)
|
||||
|
||||
tiled_mlp_num_shards: int | None = Field(
|
||||
default=None,
|
||||
json_schema_extra={
|
||||
"description": "Number of shards to use for ALST tiled mlp. If unset, it will be set based on seqlen/hidden_size"
|
||||
},
|
||||
)
|
||||
|
||||
llama4_linearized_experts: bool | None = None
|
||||
|
||||
deepspeed: str | dict[str, Any] | None = Field(
|
||||
@@ -613,7 +627,7 @@ class AxolotlInputConfig(
|
||||
torch_compile: Literal["auto"] | bool | None = Field(
|
||||
default=None,
|
||||
json_schema_extra={
|
||||
"description": "Whether to use torch.compile and which backend to use. setting to `auto` will enable torch compile when torch>=2.5.1"
|
||||
"description": "Whether to use torch.compile and which backend to use. setting to `auto` will enable torch compile when torch>=2.6.0"
|
||||
},
|
||||
)
|
||||
torch_compile_backend: str | None = Field(
|
||||
@@ -782,7 +796,7 @@ class AxolotlInputConfig(
|
||||
chat_template_jinja: str | None = Field(
|
||||
default=None,
|
||||
json_schema_extra={
|
||||
"description": "Custom jinja template for chat template. This will be only used if chat_template is set to `jinja` or `null` (in which case chat_template is automatically set to `jinja`). Default is null."
|
||||
"description": "Custom jinja template or path to jinja file for chat template. This will be only used if chat_template is set to `jinja` or `null` (in which case chat_template is automatically set to `jinja`). Default is null."
|
||||
},
|
||||
)
|
||||
chat_template_kwargs: dict[str, Any] | None = Field(
|
||||
@@ -1069,9 +1083,9 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig):
|
||||
def check_min_torch_version(self):
|
||||
if self.env_capabilities and self.env_capabilities.torch_version:
|
||||
torch_version = self.env_capabilities.torch_version
|
||||
if version.parse(torch_version) < version.parse("2.5.1"):
|
||||
if version.parse(torch_version) < version.parse("2.6.0"):
|
||||
LOG.warning(
|
||||
f"torch=={torch_version} may not be supported in future versions. Please consider upgrading to torch>=2.5.1."
|
||||
f"torch=={torch_version} not be supported. Please upgrade to torch>=2.6.0."
|
||||
)
|
||||
|
||||
return self
|
||||
@@ -1114,3 +1128,17 @@ class AxolotlConfigWCapabilities(AxolotlInputConfig):
|
||||
raise ValueError("QAT is not supported on torch version < 2.6.0")
|
||||
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def default_dataloader_opts(cls, data):
|
||||
if (
|
||||
data.get("dataloader_num_workers") is None
|
||||
and data.get("dataloader_pin_memory") is None
|
||||
and data.get("dataloader_prefetch_factor") is None
|
||||
):
|
||||
data["dataloader_num_workers"] = data.get("capabilities").get("n_gpu", 1)
|
||||
data["dataloader_pin_memory"] = True
|
||||
data["dataloader_prefetch_factor"] = 256
|
||||
|
||||
return data
|
||||
|
||||
@@ -89,7 +89,7 @@ class SFTDataset(BaseModel):
|
||||
chat_template_jinja: str | None = Field(
|
||||
default=None,
|
||||
json_schema_extra={
|
||||
"description": "Custom jinja chat template. Used only if `chat_template: jinja` or empty."
|
||||
"description": "Custom jinja chat template or path to jinja file. Used only if `chat_template: jinja` or empty."
|
||||
},
|
||||
)
|
||||
data_files: str | list[str] | None = Field(
|
||||
|
||||
@@ -476,6 +476,19 @@ class TrainingValidationMixin:
|
||||
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_tiled_mlp_deepspeed(cls, data):
|
||||
capabilities = data.get("capabilities")
|
||||
n_gpu = 0
|
||||
if capabilities and capabilities.get("n_gpu", 0) >= 1:
|
||||
n_gpu = capabilities.get("n_gpu", 0)
|
||||
if data.get("tiled_mlp", False) and (n_gpu > 1 and not data.get("deepspeed")):
|
||||
raise ValueError(
|
||||
"tiled_mlp requires deepspeed ZeRO to be enabled for multi-gpu"
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
class LoRAValidationMixin:
|
||||
"""Validation methods related to LoRA/QLoRA configuration."""
|
||||
|
||||
@@ -535,6 +535,9 @@ def setup_deepspeed_env(cfg, stage=None):
|
||||
|
||||
os.environ["ACCELERATE_USE_DEEPSPEED"] = "true"
|
||||
os.environ["ACCELERATE_DEEPSPEED_CONFIG_FILE"] = cfg.deepspeed
|
||||
os.environ["ACCELERATE_GRADIENT_ACCUMULATION_STEPS"] = str(
|
||||
cfg.gradient_accumulation_steps
|
||||
)
|
||||
if stage:
|
||||
os.environ["ACCELERATE_DEEPSPEED_ZERO_STAGE"] = str(stage)
|
||||
if stage == 3:
|
||||
@@ -543,6 +546,15 @@ def setup_deepspeed_env(cfg, stage=None):
|
||||
# NOTE(djsaunde): The distribued state cannot be initialized prior to the
|
||||
# ACCELERATE_USE_DEEPSPEED assignment, but it must be initialized some time prior
|
||||
# to model load.
|
||||
if int(os.environ.get("WORLD_SIZE", "1")) == 1:
|
||||
os.environ["WORLD_SIZE"] = "1" # force it in case not set
|
||||
os.environ["LOCAL_RANK"] = "0" # force it in case not set
|
||||
os.environ["RANK"] = os.environ.get("LOCAL_RANK", "0")
|
||||
import deepspeed.comm as dist
|
||||
|
||||
dist.init_distributed(
|
||||
dist_backend="nccl", auto_mpi_discovery=False, dist_init_required=True
|
||||
)
|
||||
init_distributed_state()
|
||||
|
||||
# If we don't assign this, it doesn't actually get set in the accelerate weakref
|
||||
@@ -609,6 +621,9 @@ def prepare_opinionated_env(cfg):
|
||||
if cfg.qlora_sharded_model_loading:
|
||||
# model loading is forked after the tokenizer
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
if cfg.sample_packing:
|
||||
# multipack parallel packing sampler defaults to using fork
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
|
||||
def setup_trainer(
|
||||
|
||||
@@ -10,12 +10,13 @@ import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path, PosixPath
|
||||
from pathlib import Path
|
||||
from typing import Generator
|
||||
|
||||
import datasets
|
||||
import pytest
|
||||
import requests
|
||||
import torch
|
||||
from huggingface_hub import snapshot_download
|
||||
from huggingface_hub.errors import LocalEntryNotFoundError
|
||||
from tokenizers import AddedToken
|
||||
@@ -424,8 +425,8 @@ def temp_dir() -> Generator[str, None, None]:
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def unique_triton_cache_dir(temp_dir: str | PosixPath) -> None:
|
||||
os.environ["TRITON_CACHE_DIR"] = str(temp_dir) + "/.triton/cache"
|
||||
def torch_manual_seed():
|
||||
torch.manual_seed(42)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
|
||||
@@ -19,8 +19,15 @@ def test_geglu_forward_shape():
|
||||
assert out.device == gate.device
|
||||
|
||||
|
||||
def test_geglu_forward_values():
|
||||
@pytest.mark.flaky(retries=1, delay=5)
|
||||
@pytest.mark.parametrize(
|
||||
"torch_seed",
|
||||
[0, 42],
|
||||
)
|
||||
def test_geglu_forward_values(torch_seed):
|
||||
"""Test GEGLU forward pass matches PyTorch reference implementation."""
|
||||
torch.manual_seed(torch_seed)
|
||||
|
||||
gate = torch.randn(2, 3, 64, device="cuda")
|
||||
up = torch.randn(2, 3, 64, device="cuda")
|
||||
|
||||
@@ -33,6 +40,7 @@ def test_geglu_forward_values():
|
||||
assert torch.allclose(triton_out, torch_out, rtol=1e-3)
|
||||
|
||||
|
||||
@pytest.mark.flaky(retries=1, delay=5)
|
||||
@pytest.mark.parametrize(
|
||||
"torch_seed",
|
||||
[0, 42],
|
||||
|
||||
@@ -104,7 +104,7 @@ class TestSequenceParallelism:
|
||||
(True, 1, True, None, 2.5), # defaults to varlen_llama3 ring_attn_func
|
||||
(False, 2, True, None, 2.5), # defaults to batch_ring ring_attn_func
|
||||
# (False, 2, True, "batch_zigzag", 2.5),
|
||||
(False, 2, False, None, 2.5), # defaults to batch_ring ring_attn_func
|
||||
(False, 2, False, None, 2.65), # defaults to batch_ring ring_attn_func
|
||||
],
|
||||
ids=[
|
||||
"sample_packing, varlen_llama3 ring_attn_func",
|
||||
|
||||
@@ -86,5 +86,5 @@ class TestPackedFlex:
|
||||
)
|
||||
|
||||
check_tensorboard(
|
||||
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss (%s) is too high"
|
||||
temp_dir + "/runs", "train/train_loss", 2.1, "Train Loss (%s) is too high"
|
||||
)
|
||||
|
||||
@@ -90,7 +90,7 @@ class TestMultiGPULlama:
|
||||
)
|
||||
|
||||
check_tensorboard(
|
||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss (%s) is too high"
|
||||
temp_dir + "/runs", "train/train_loss", 2.8, "Train Loss (%s) is too high"
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -364,6 +364,7 @@ class TestMultiGPULlama:
|
||||
"fsdp_auto_wrap_policy": "TRANSFORMER_BASED_WRAP",
|
||||
},
|
||||
"use_tensorboard": True,
|
||||
"seed": 42,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -759,6 +760,7 @@ class TestMultiGPULlama:
|
||||
"flash_attention": True,
|
||||
"deepspeed": str(AXOLOTL_ROOT / "deepspeed_configs/zero2.json"),
|
||||
"use_tensorboard": True,
|
||||
"seed": 42,
|
||||
**adapter,
|
||||
}
|
||||
)
|
||||
@@ -856,7 +858,7 @@ class TestMultiGPULlama:
|
||||
)
|
||||
|
||||
check_tensorboard(
|
||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss (%s) is too high"
|
||||
temp_dir + "/runs", "train/train_loss", 2.5, "Train Loss (%s) is too high"
|
||||
)
|
||||
|
||||
@pytest.mark.skip(
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user