Compare commits
8 Commits
v0.13.0
...
transforme
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dcd916b29b | ||
|
|
c6ddcdd06a | ||
|
|
7fb6a947d9 | ||
|
|
b234532d9f | ||
|
|
8990ca3205 | ||
|
|
006f226270 | ||
|
|
0b635e69c5 | ||
|
|
0d27e14e45 |
8
.github/workflows/base.yml
vendored
8
.github/workflows/base.yml
vendored
@@ -57,14 +57,14 @@ jobs:
|
||||
cuda_version: 12.8.1
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.0
|
||||
pytorch: 2.9.1
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-base"
|
||||
- cuda: "130"
|
||||
cuda_version: 13.0.0
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.0
|
||||
pytorch: 2.9.1
|
||||
torch_cuda_arch_list: "9.0+PTX"
|
||||
dockerfile: "Dockerfile-base"
|
||||
# - cuda: "128"
|
||||
@@ -146,14 +146,14 @@ jobs:
|
||||
cuda_version: 12.8.1
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.0
|
||||
pytorch: 2.9.1
|
||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||
dockerfile: "Dockerfile-uv-base"
|
||||
- cuda: "130"
|
||||
cuda_version: 13.0.0
|
||||
cudnn_version: ""
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.0
|
||||
pytorch: 2.9.1
|
||||
torch_cuda_arch_list: "9.0+PTX"
|
||||
dockerfile: "Dockerfile-uv-base"
|
||||
steps:
|
||||
|
||||
20
.github/workflows/main.yml
vendored
20
.github/workflows/main.yml
vendored
@@ -36,6 +36,16 @@ jobs:
|
||||
pytorch: 2.8.0
|
||||
axolotl_extras:
|
||||
is_latest: true
|
||||
- cuda: 128
|
||||
cuda_version: 12.8.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.0
|
||||
axolotl_extras:
|
||||
- cuda: 128
|
||||
cuda_version: 12.8.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.1
|
||||
axolotl_extras:
|
||||
runs-on: axolotl-gpu-runner
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -109,6 +119,16 @@ jobs:
|
||||
pytorch: 2.8.0
|
||||
axolotl_extras:
|
||||
is_latest: true
|
||||
- cuda: 128
|
||||
cuda_version: 12.8.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.0
|
||||
axolotl_extras:
|
||||
- cuda: 128
|
||||
cuda_version: 12.8.1
|
||||
python_version: "3.11"
|
||||
pytorch: 2.9.1
|
||||
axolotl_extras:
|
||||
runs-on: axolotl-gpu-runner
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
||||
@@ -11,13 +11,13 @@ repos:
|
||||
- id: no-commit-to-branch
|
||||
args: ['--branch', 'main']
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.14.3
|
||||
rev: v0.14.7
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix]
|
||||
- id: ruff-format
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.18.2
|
||||
rev: v1.19.0
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies:
|
||||
@@ -26,7 +26,7 @@ repos:
|
||||
'pydantic>=2.5.3',
|
||||
]
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.8.6
|
||||
rev: 1.9.2
|
||||
hooks:
|
||||
- id: bandit
|
||||
args: [
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
|
||||
## 🎉 Latest Updates
|
||||
|
||||
- 2025/11: Axolotl now includes support for [Olmo3](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/olmo3).
|
||||
- 2025/10: New model support has been added in Axolotl for: [Qwen3 Next](https://github.com/axolotl-ai-cloud/axolotl/blob/main/examples/qwen3-next), [Qwen2.5-vl, Qwen3-vl](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/qwen2_5-vl), [Qwen3, Qwen3MoE](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/qwen3), [Granite 4](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/granite4), [HunYuan](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/hunyuan), [Magistral 2509](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/magistral#vision), [Apertus](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/apertus), and [Seed-OSS](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples/seed-oss).
|
||||
- 2025/09: Axolotl now has text diffusion training. Read more [here](https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/diffusion).
|
||||
- 2025/08: QAT has been updated to include NVFP4 support. See [PR](https://github.com/axolotl-ai-cloud/axolotl/pull/3107).
|
||||
|
||||
@@ -51,7 +51,7 @@ RUN git lfs install --skip-repo && \
|
||||
pip3 install -U --no-cache-dir pydantic==1.10.10 && \
|
||||
pip3 cache purge
|
||||
|
||||
RUN if [ "$PYTORCH_VERSION" = "2.9.0" ] && [ "$CUDA" = "128" ] ; then \
|
||||
RUN if [ "$PYTORCH_VERSION" = "2.9.1" ] && [ "$CUDA" = "128" ] ; then \
|
||||
wget https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.4.17/flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||
pip3 install --no-cache-dir flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||
rm flash_attn-2.8.3+cu128torch2.9-cp311-cp311-linux_x86_64.whl; \
|
||||
|
||||
@@ -4,7 +4,7 @@ format:
|
||||
html:
|
||||
toc: true
|
||||
toc-depth: 3
|
||||
number-sections: true
|
||||
# number-sections: true
|
||||
code-tools: true
|
||||
execute:
|
||||
enabled: false
|
||||
@@ -14,12 +14,18 @@ This guide covers advanced training configurations for multi-GPU setups using Ax
|
||||
|
||||
## Overview {#sec-overview}
|
||||
|
||||
Axolotl supports several methods for multi-GPU training:
|
||||
When training on multiple GPUs, Axolotl supports 3 sharding/parallelism strategies. Additionally, you can layer specific optimization features on top of that strategy.
|
||||
|
||||
- DeepSpeed (recommended)
|
||||
- FSDP (Fully Sharded Data Parallel)
|
||||
- Sequence parallelism
|
||||
- FSDP + QLoRA
|
||||
You generally cannot combine these strategies; they are mutually exclusive.
|
||||
|
||||
1. **DeepSpeed**: Powerful optimization library, supports ZeRO stages 1-3.
|
||||
2. **FSDP (Fully Sharded Data Parallel)**: PyTorch's native sharding implementation (Recommended).
|
||||
3. **DDP (Distributed Data Parallel)**: PyTorch's native parallelism implementation (Default if neither of the above are selected).
|
||||
|
||||
These features can often be combined with the strategies above:
|
||||
|
||||
* **Sequence Parallelism**: Splits long sequences across GPUs (Compatible with DDP, DeepSpeed, and FSDP).
|
||||
* **FSDP + QLoRA**: Combines 4-bit quantization with FSDP (Specific to FSDP).
|
||||
|
||||
## DeepSpeed {#sec-deepspeed}
|
||||
|
||||
@@ -65,12 +71,18 @@ Start from Stage 1 -> Stage 2 -> Stage 3.
|
||||
|
||||
## Fully Sharded Data Parallel (FSDP) {#sec-fsdp}
|
||||
|
||||
FSDP allows you to shard model parameters, gradients, and optimizer states across data parallel workers.
|
||||
|
||||
::: {.callout-note}
|
||||
|
||||
FSDP2 is recommended for new users. FSDP1 is deprecated and will be removed in an upcoming release of Axolotl.
|
||||
|
||||
:::
|
||||
|
||||
### FSDP + QLoRA {#sec-fsdp-qlora}
|
||||
|
||||
For combining FSDP with QLoRA, see our [dedicated guide](fsdp_qlora.qmd).
|
||||
|
||||
### Migrating from FSDP1 to FSDP2 {#sec-migrate-fsdp1-fsdp2}
|
||||
|
||||
To migrate your config from FSDP1 to FSDP2, you must use the `fsdp_version` top-level config field to specify the FSDP version, and
|
||||
@@ -145,10 +157,6 @@ single sequence causes OOM errors during model training.
|
||||
|
||||
See our [dedicated guide](sequence_parallelism.qmd) for more information.
|
||||
|
||||
### FSDP + QLoRA {#sec-fsdp-qlora}
|
||||
|
||||
For combining FSDP with QLoRA, see our [dedicated guide](fsdp_qlora.qmd).
|
||||
|
||||
## Performance Optimization {#sec-performance}
|
||||
|
||||
### Liger Kernel Integration {#sec-liger}
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
"%%capture\n",
|
||||
"# This step can take ~5-10 minutes to install dependencies\n",
|
||||
"!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
|
||||
"!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@8a1a0ec\""
|
||||
"!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953\""
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
46
examples/olmo3/README.md
Normal file
46
examples/olmo3/README.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# Finetune Allenai's Olmo 3 with Axolotl
|
||||
|
||||
[Olmo 3](https://huggingface.co/collections/allenai/olmo-3) are a family of 7B and 32B models open source models trained by The Allen Institute for Artificial Intelligence.
|
||||
|
||||
This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking.
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html).
|
||||
|
||||
Here is an example of how to install from pip:
|
||||
```bash
|
||||
# Ensure you have a compatible version of Pytorch installed
|
||||
pip3 install packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
|
||||
# Install Cut Cross Entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
```
|
||||
|
||||
2. Run the finetuning example:
|
||||
|
||||
```bash
|
||||
axolotl train examples/olmo3/olmo3-7b-qlora.yaml
|
||||
```
|
||||
|
||||
Let us know how it goes. Happy finetuning! 🚀
|
||||
|
||||
### TIPS
|
||||
|
||||
- The example config can be re-used for Olmo and Olmo 2.
|
||||
- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config.
|
||||
- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html).
|
||||
- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template).
|
||||
|
||||
## Optimization Guides
|
||||
|
||||
Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html).
|
||||
|
||||
## Related Resources
|
||||
|
||||
- [Olmo 3 Blog](https://allenai.org/blog/olmo3)
|
||||
- [Axolotl Docs](https://docs.axolotl.ai)
|
||||
- [Axolotl Website](https://axolotl.ai)
|
||||
- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
|
||||
- [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
|
||||
64
examples/olmo3/olmo3-7b-qlora.yaml
Normal file
64
examples/olmo3/olmo3-7b-qlora.yaml
Normal file
@@ -0,0 +1,64 @@
|
||||
base_model: allenai/Olmo-3-7B-Instruct-SFT
|
||||
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
plugins:
|
||||
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
datasets:
|
||||
- path: fozziethebeat/alpaca_messages_2k_test
|
||||
type: chat_template
|
||||
|
||||
dataset_prepared_path: last_run_prepared
|
||||
val_set_size: 0.1
|
||||
output_dir: ./outputs/lora-out
|
||||
|
||||
adapter: qlora
|
||||
lora_model_dir:
|
||||
|
||||
sequence_len: 2048
|
||||
sample_packing: true
|
||||
|
||||
lora_r: 32
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.05
|
||||
lora_target_linear: true
|
||||
lora_target_modules:
|
||||
- gate_proj
|
||||
- down_proj
|
||||
- up_proj
|
||||
- q_proj
|
||||
- v_proj
|
||||
- k_proj
|
||||
- o_proj
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_name:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 4
|
||||
micro_batch_size: 2
|
||||
num_epochs: 1
|
||||
optimizer: adamw_bnb_8bit
|
||||
lr_scheduler: cosine
|
||||
learning_rate: 0.0002
|
||||
|
||||
bf16: auto
|
||||
tf32: false
|
||||
|
||||
gradient_checkpointing: true
|
||||
resume_from_checkpoint:
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
|
||||
warmup_ratio: 0.1
|
||||
evals_per_epoch: 1
|
||||
saves_per_epoch: 1
|
||||
|
||||
# save_first_step: true # uncomment this to validate checkpoint saving works with your config
|
||||
@@ -6,21 +6,17 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). You need to install from main as Seed-OSS is only on nightly or use our latest [Docker images](https://docs.axolotl.ai/docs/docker.html).
|
||||
1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html).
|
||||
|
||||
Here is an example of how to install from main for pip:
|
||||
Here is an example of how to install from pip:
|
||||
```bash
|
||||
# Ensure you have a compatible version of Pytorch installed
|
||||
pip3 install packaging setuptools wheel ninja
|
||||
pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
|
||||
|
||||
```bash
|
||||
# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
|
||||
git clone https://github.com/axolotl-ai-cloud/axolotl.git
|
||||
cd axolotl
|
||||
|
||||
pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
|
||||
pip3 install --no-build-isolation -e '.[flash-attn]'
|
||||
|
||||
# Install Cut Cross Entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
```
|
||||
# Install Cut Cross Entropy
|
||||
python scripts/cutcrossentropy_install.py | sh
|
||||
```
|
||||
|
||||
2. Run the finetuning example:
|
||||
|
||||
@@ -41,9 +37,7 @@ Let us know how it goes. Happy finetuning! 🚀
|
||||
|
||||
## Optimization Guides
|
||||
|
||||
- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
|
||||
- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
|
||||
- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
|
||||
Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html).
|
||||
|
||||
## Related Resources
|
||||
|
||||
|
||||
@@ -37,9 +37,7 @@ This guide shows how to fine-tune SmolVLM2 models with Axolotl.
|
||||
|
||||
## Optimization Guides
|
||||
|
||||
- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
|
||||
- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
|
||||
- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
|
||||
Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html).
|
||||
|
||||
## Related Resources
|
||||
|
||||
|
||||
@@ -11,9 +11,9 @@ liger-kernel==0.6.3
|
||||
packaging==23.2
|
||||
|
||||
huggingface_hub>=0.36.0
|
||||
peft>=0.17.1
|
||||
peft>=0.18.0
|
||||
tokenizers>=0.22.1
|
||||
transformers==4.57.1
|
||||
transformers==4.57.3
|
||||
accelerate==1.11.0
|
||||
datasets==4.4.1
|
||||
deepspeed>=0.17.0
|
||||
@@ -42,7 +42,6 @@ numpy>=2.2.6
|
||||
# qlora things
|
||||
evaluate==0.4.1
|
||||
scipy
|
||||
scikit-learn==1.4.2
|
||||
nvidia-ml-py==12.560.30
|
||||
art
|
||||
tensorboard
|
||||
|
||||
@@ -29,5 +29,5 @@ UV_PREFIX = "uv " if USE_UV else ""
|
||||
|
||||
print(
|
||||
UNINSTALL_PREFIX
|
||||
+ f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@8a1a0ec"'
|
||||
+ f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953"'
|
||||
)
|
||||
|
||||
@@ -19,7 +19,7 @@ python scripts/cutcrossentropy_install.py | sh
|
||||
|
||||
- If you are installing from pip
|
||||
```bash
|
||||
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@8a1a0ec"
|
||||
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953"
|
||||
```
|
||||
|
||||
## Usage
|
||||
@@ -65,6 +65,9 @@ plugins:
|
||||
- mistral3
|
||||
- mixtral
|
||||
- mllama
|
||||
- olmo
|
||||
- olmo2
|
||||
- olmo3
|
||||
- phi
|
||||
- phi3
|
||||
- phi4_multimodal
|
||||
|
||||
@@ -35,7 +35,7 @@ LOG = get_logger(__name__)
|
||||
|
||||
_CCE_INSTALL_MESSAGE = (
|
||||
"Please install Axolotl's fork of cut_cross_entropy with transformers support using "
|
||||
'`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@8a1a0ec"`'
|
||||
'`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953"`'
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -102,6 +102,8 @@ def load_lora(
|
||||
lora_config_kwargs["layer_replication"] = cfg.peft_layer_replication
|
||||
if cfg.peft_trainable_token_indices:
|
||||
lora_config_kwargs["trainable_token_indices"] = cfg.peft_trainable_token_indices
|
||||
if cfg.peft_ensure_weight_tying is not None:
|
||||
lora_config_kwargs["ensure_weight_tying"] = cfg.peft_ensure_weight_tying
|
||||
|
||||
# Determine the correct PEFT task type
|
||||
model_cls = type(model).__name__
|
||||
|
||||
@@ -49,6 +49,9 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
|
||||
"seed_oss",
|
||||
"lfm2",
|
||||
"lfm2_moe",
|
||||
"olmo",
|
||||
"olmo2",
|
||||
"olmo3",
|
||||
]
|
||||
|
||||
|
||||
|
||||
126
src/axolotl/utils/chat_templates/templates/exaone4.jinja
Normal file
126
src/axolotl/utils/chat_templates/templates/exaone4.jinja
Normal file
@@ -0,0 +1,126 @@
|
||||
{%- if not skip_think is defined %}
|
||||
{%- set skip_think = true %}
|
||||
{%- endif %}
|
||||
{%- set role_indicators = {
|
||||
'user': '[|user|]\n',
|
||||
'assistant': '[|assistant|]\n',
|
||||
'system': '[|system|]\n',
|
||||
'tool': '[|tool|]\n'
|
||||
} %}
|
||||
{%- set end_of_turn = '[|endofturn|]\n' %}
|
||||
{%- macro available_tools(tools) %}
|
||||
{{- "# Available Tools" }}
|
||||
{{- "\nYou can use none, one, or multiple of the following tools by calling them as functions to help with the user’s query." }}
|
||||
{{- "\nHere are the tools available to you in JSON format within <tool> and </tool> tags:\n" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "<tool>" }}
|
||||
{{- tool | tojson(ensure_ascii=False) | safe }}
|
||||
{{- "</tool>\n" }}
|
||||
{%- endfor %}
|
||||
{{- "\nFor each function call you want to make, return a JSON object with function name and arguments within <tool_call> and </tool_call> tags, like:" }}
|
||||
{{- "\n<tool_call>{\"name\": function_1_name, \"arguments\": {argument_1_name: argument_1_value, argument_2_name: argument_2_value}}</tool_call>" }}
|
||||
{{- "\n<tool_call>{\"name\": function_2_name, \"arguments\": {...}}</tool_call>\n..." }}
|
||||
{{- "\nNote that if no argument name is specified for a tool, you can just print the argument value directly, without the argument name or JSON formatting." }}
|
||||
{%- endmacro %}
|
||||
{%- set ns = namespace(last_query_index = messages|length - 1) %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.role == "user" and message.content is string %}
|
||||
{%- set ns.last_query_index = loop.index0 -%}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for i in range(messages | length) %}
|
||||
{%- set msg = messages[i] %}
|
||||
{%- set role = msg.role %}
|
||||
{%- if role not in role_indicators %}
|
||||
{{- raise_exception('Unknown role: ' ~ role) }}
|
||||
{%- endif %}
|
||||
{# ---- Case A: If the first message is "system", handle it here alone (without continue) ---- #}
|
||||
{%- if i == 0 and role == 'system' %}
|
||||
{{- role_indicators['system'] }}
|
||||
{{- msg.content }}
|
||||
{%- if tools is defined and tools %}
|
||||
{{- "\n\n" }}{{- available_tools(tools) }}
|
||||
{%- endif %}
|
||||
{{- end_of_turn -}}
|
||||
{%- else %}
|
||||
{# ---- Case B: If the first message is tools instead of system, inject the system tools preamble ---- #}
|
||||
{%- if i == 0 and tools is defined and tools %}
|
||||
{{- role_indicators['system'] }}
|
||||
{{- available_tools(tools) }}
|
||||
{{- end_of_turn -}}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if role == 'assistant' %}
|
||||
{{- role_indicators['assistant'] }}
|
||||
{%- if msg.content %}
|
||||
{%- if "</think>" in msg.content %}
|
||||
{%- set content = msg.content.split('</think>')[-1].strip() %}
|
||||
{%- set reasoning_content = msg.content.split('</think>')[0].strip() %}
|
||||
{%- if reasoning_content.startswith("<think>") %}
|
||||
{%- set reasoning_content = reasoning_content[7:].strip() %}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{%- set content = msg.content %}
|
||||
{%- endif %}
|
||||
{%- if msg.reasoning_content %}
|
||||
{%- set reasoning_content = msg.reasoning_content %}
|
||||
{%- endif %}
|
||||
{%- if (not skip_think and loop.last) and reasoning_content is defined %}
|
||||
{{- "<think>\n" }}
|
||||
{{- reasoning_content}}
|
||||
{{- "\n</think>\n\n" }}
|
||||
{%- else %}
|
||||
{{- "<think>\n\n</think>\n\n" }}
|
||||
{%- endif %}
|
||||
{{- content }}
|
||||
{%- endif %}
|
||||
{%- if msg.tool_calls %}
|
||||
{%- if msg.content %}
|
||||
{{- "\n" }}
|
||||
{%- else %}
|
||||
{{- "<think>\n\n</think>\n\n" }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in msg.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{%- if tool_call.arguments is defined %}
|
||||
{%- set arguments = tool_call.arguments %}
|
||||
{%- elif tool_call.parameters is defined %}
|
||||
{%- set arguments = tool_call.parameters %}
|
||||
{%- else %}
|
||||
{{- raise_exception('arguments or parameters are mandatory: ' ~ tool_call) }}
|
||||
{%- endif %}
|
||||
{{- "<tool_call>" }}{"name": "{{- tool_call.name }}", "arguments": {{ arguments | tojson(ensure_ascii=False) | safe }}}{{- "</tool_call>" }}
|
||||
{%- if not loop.last %}
|
||||
{{- "\n" }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- end_of_turn -}}
|
||||
{%- elif role == "tool" %}
|
||||
{%- if i == 0 or messages[i - 1].role != "tool" %}
|
||||
{{- role_indicators['tool'] }}
|
||||
{%- endif %}
|
||||
{%- if msg.content is defined %}
|
||||
{{- "<tool_result>" }}{"result": {{ msg.content | tojson(ensure_ascii=False) | safe }}}{{- "</tool_result>" }}
|
||||
{%- endif %}
|
||||
{%- if loop.last or messages[i + 1].role != "tool" %}
|
||||
{{- end_of_turn -}}
|
||||
{%- else %}
|
||||
{{- "\n" }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- role_indicators[role] }}
|
||||
{{- msg.content }}
|
||||
{{- end_of_turn -}}
|
||||
{%- endif %}
|
||||
{% endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- role_indicators['assistant'] }}
|
||||
{%- if enable_thinking is defined and enable_thinking is true %}
|
||||
{{- "<think>\n" }}
|
||||
{%- else %}
|
||||
{{- "<think>\n\n</think>\n\n" }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
@@ -58,6 +58,7 @@ class ChatTemplate(str, Enum):
|
||||
falcon_h1 = "falcon_h1"
|
||||
tokenizer_default = "tokenizer_default"
|
||||
exaone = "exaone"
|
||||
exaone4 = "exaone4"
|
||||
metharme = "metharme"
|
||||
pixtral = "pixtral"
|
||||
llava = "llava"
|
||||
|
||||
@@ -100,6 +100,15 @@ class LoraConfig(BaseModel):
|
||||
)
|
||||
},
|
||||
)
|
||||
peft_ensure_weight_tying: bool | None = Field(
|
||||
default=None,
|
||||
json_schema_extra={
|
||||
"description": (
|
||||
"Whether to tie adapter weights for tied model weights. "
|
||||
"See https://github.com/huggingface/peft/issues/2864"
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
qlora_sharded_model_loading: bool | None = Field(
|
||||
default=False,
|
||||
|
||||
Reference in New Issue
Block a user