From 2974670bf849a7f51534a5f646d717ad609ef9b2 Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Fri, 8 Aug 2025 19:09:11 +0700
Subject: [PATCH] Feat: add arcee (#3028)

* feat: add arcee

* feat: add latest models supported by cce

* feat: add arcee example config

* chore: lint

* fix: typo

* feat: change to instruct

* feat: add vram usage

* Update README.md
---
 examples/arcee/README.md                      | 53 +++++++++++++++
 examples/arcee/afm-4.5b-qlora.yaml            | 64 +++++++++++++++++++
 .../colab-axolotl-example.ipynb               |  2 +-
 .../magistral/magistral-small-fsdp-qlora.yaml |  1 -
 examples/magistral/magistral-small-qlora.yaml |  1 -
 .../magistral-small-think-qlora.yaml          |  1 -
 scripts/cutcrossentropy_install.py            |  2 +-
 .../integrations/cut_cross_entropy/README.md  |  7 +-
 .../cut_cross_entropy/__init__.py             |  2 +-
 src/axolotl/monkeypatch/multipack.py          |  1 +
 10 files changed, 127 insertions(+), 7 deletions(-)
 create mode 100644 examples/arcee/README.md
 create mode 100644 examples/arcee/afm-4.5b-qlora.yaml

diff --git a/examples/arcee/README.md b/examples/arcee/README.md
new file mode 100644
index 000000000..217893306
--- /dev/null
+++ b/examples/arcee/README.md
@@ -0,0 +1,53 @@
+# Finetune ArceeAI's AFM with Axolotl
+
+[Arcee Foundation Models (AFM)](https://huggingface.co/collections/arcee-ai/afm-45b-68823397c351603014963473) are a family of 4.5B parameter open weight models trained by Arcee.ai.
+
+This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking.
+
+Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the AFM model.
+
+## Getting started
+
+1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). You need to install from main as AFM is only on nightly or use our latest [Docker images](https://docs.axolotl.ai/docs/docker.html).
+
+    Here is an example of how to install from main for pip:
+
+```bash
+# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
+git clone https://github.com/axolotl-ai-cloud/axolotl.git
+cd axolotl
+
+pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install --no-build-isolation -e '.[flash-attn]'
+```
+
+2. Run the finetuning example:
+
+```bash
+axolotl train examples/arcee/afm-4.5b-qlora.yaml
+```
+
+This config uses about 7.8GiB VRAM.
+
+Let us know how it goes. Happy finetuning! 🚀
+
+### TIPS
+
+- For inference, the official Arcee.ai team recommends `top_p: 0.95`, `temperature: 0.5`, `top_k: 50`, and `repeat_penalty: 1.1`.
+- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config.
+- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html).
+- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template).
+
+## Optimization Guides
+
+- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
+- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
+- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
+
+## Related Resources
+
+- [AFM Blog](https://docs.arcee.ai/arcee-foundation-models/introduction-to-arcee-foundation-models)
+- [Axolotl Docs](https://docs.axolotl.ai)
+- [Axolotl Website](https://axolotl.ai)
+- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
+- [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
diff --git a/examples/arcee/afm-4.5b-qlora.yaml b/examples/arcee/afm-4.5b-qlora.yaml
new file mode 100644
index 000000000..2cb42cacd
--- /dev/null
+++ b/examples/arcee/afm-4.5b-qlora.yaml
@@ -0,0 +1,64 @@
+base_model: arcee-ai/AFM-4.5B
+
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
+plugins:
+  - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
+
+load_in_8bit: false
+load_in_4bit: true
+
+datasets:
+  - path: fozziethebeat/alpaca_messages_2k_test
+    type: chat_template
+
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.1
+output_dir: ./outputs/lora-out
+
+adapter: qlora
+lora_model_dir:
+
+sequence_len: 2048
+sample_packing: true
+
+lora_r: 32
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_linear: true
+lora_target_modules:
+  - gate_proj
+  - down_proj
+  - up_proj
+  - q_proj
+  - v_proj
+  - k_proj
+  - o_proj
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 2
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+bf16: auto
+tf32: false
+
+gradient_checkpointing: true
+resume_from_checkpoint:
+logging_steps: 1
+flash_attention: true
+
+warmup_ratio: 0.1
+evals_per_epoch: 1
+saves_per_epoch: 1
+
+# save_first_step: true  # uncomment this to validate checkpoint saving works with your config
diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb
index c283092be..d79c2fb09 100644
--- a/examples/colab-notebooks/colab-axolotl-example.ipynb
+++ b/examples/colab-notebooks/colab-axolotl-example.ipynb
@@ -40,7 +40,7 @@
         "%%capture\n",
         "# This step can take ~5-10 minutes to install dependencies\n",
         "!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
-        "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169\""
+        "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8\""
       ]
     },
     {
diff --git a/examples/magistral/magistral-small-fsdp-qlora.yaml b/examples/magistral/magistral-small-fsdp-qlora.yaml
index 14a7ee219..d46c49fe0 100644
--- a/examples/magistral/magistral-small-fsdp-qlora.yaml
+++ b/examples/magistral/magistral-small-fsdp-qlora.yaml
@@ -27,7 +27,6 @@ sequence_len: 2048
 sample_packing: true
 eval_sample_packing: false
 
-
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/examples/magistral/magistral-small-qlora.yaml b/examples/magistral/magistral-small-qlora.yaml
index 5ec2f0fbf..188924d39 100644
--- a/examples/magistral/magistral-small-qlora.yaml
+++ b/examples/magistral/magistral-small-qlora.yaml
@@ -26,7 +26,6 @@ lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 
-
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/examples/magistral/magistral-small-think-qlora.yaml b/examples/magistral/magistral-small-think-qlora.yaml
index 0e8a9c1f7..b715b3156 100644
--- a/examples/magistral/magistral-small-think-qlora.yaml
+++ b/examples/magistral/magistral-small-think-qlora.yaml
@@ -26,7 +26,6 @@ lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 
-
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/scripts/cutcrossentropy_install.py b/scripts/cutcrossentropy_install.py
index cf9ced60c..195aac2e2 100644
--- a/scripts/cutcrossentropy_install.py
+++ b/scripts/cutcrossentropy_install.py
@@ -29,5 +29,5 @@ UV_PREFIX = "uv " if USE_UV else ""
 
 print(
     UNINSTALL_PREFIX
-    + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"'
+    + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"'
 )
diff --git a/src/axolotl/integrations/cut_cross_entropy/README.md b/src/axolotl/integrations/cut_cross_entropy/README.md
index e0ff14db8..7924d3472 100644
--- a/src/axolotl/integrations/cut_cross_entropy/README.md
+++ b/src/axolotl/integrations/cut_cross_entropy/README.md
@@ -19,7 +19,7 @@ python scripts/cutcrossentropy_install.py | sh
 
 - If you are installing from pip
 ```bash
-pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"
+pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"
 ```
 
 ## Usage
@@ -31,6 +31,7 @@ plugins:
 
 ## Supported Models
 
+- arcee
 - cohere
 - cohere2
 - gemma
@@ -41,13 +42,17 @@ plugins:
 - gemma3n_text
 - glm
 - glm4
+- gpt_oss
 - granite
 - granitemoe
+- hunyuan_v1_dense
+- hunyuan_v1_moe
 - llama
 - llama4
 - llama4_text
 - mistral
 - mistral3
+- mixtral
 - mllama
 - phi
 - phi3
diff --git a/src/axolotl/integrations/cut_cross_entropy/__init__.py b/src/axolotl/integrations/cut_cross_entropy/__init__.py
index 24cd7b6a7..6f529f10e 100644
--- a/src/axolotl/integrations/cut_cross_entropy/__init__.py
+++ b/src/axolotl/integrations/cut_cross_entropy/__init__.py
@@ -34,7 +34,7 @@ LOG = get_logger(__name__)
 
 _CCE_INSTALL_MESSAGE = (
     "Please install Axolotl's fork of cut_cross_entropy with transformers support using "
-    '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"`'
+    '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"`'
 )
 
 
diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py
index 5fc5ae856..7df9877d7 100644
--- a/src/axolotl/monkeypatch/multipack.py
+++ b/src/axolotl/monkeypatch/multipack.py
@@ -37,6 +37,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
     "glm4",
     "smollm3",
     "gpt_oss",
+    "arcee",
 ]