From 2974670bf849a7f51534a5f646d717ad609ef9b2 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 8 Aug 2025 19:09:11 +0700 Subject: [PATCH] Feat: add arcee (#3028) * feat: add arcee * feat: add latest models supported by cce * feat: add arcee example config * chore: lint * fix: typo * feat: change to instruct * feat: add vram usage * Update README.md --- examples/arcee/README.md | 53 +++++++++++++++ examples/arcee/afm-4.5b-qlora.yaml | 64 +++++++++++++++++++ .../colab-axolotl-example.ipynb | 2 +- .../magistral/magistral-small-fsdp-qlora.yaml | 1 - examples/magistral/magistral-small-qlora.yaml | 1 - .../magistral-small-think-qlora.yaml | 1 - scripts/cutcrossentropy_install.py | 2 +- .../integrations/cut_cross_entropy/README.md | 7 +- .../cut_cross_entropy/__init__.py | 2 +- src/axolotl/monkeypatch/multipack.py | 1 + 10 files changed, 127 insertions(+), 7 deletions(-) create mode 100644 examples/arcee/README.md create mode 100644 examples/arcee/afm-4.5b-qlora.yaml diff --git a/examples/arcee/README.md b/examples/arcee/README.md new file mode 100644 index 000000000..217893306 --- /dev/null +++ b/examples/arcee/README.md @@ -0,0 +1,53 @@ +# Finetune ArceeAI's AFM with Axolotl + +[Arcee Foundation Models (AFM)](https://huggingface.co/collections/arcee-ai/afm-45b-68823397c351603014963473) are a family of 4.5B parameter open weight models trained by Arcee.ai. + +This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking. + +Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the AFM model. + +## Getting started + +1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). You need to install from main as AFM is only on nightly or use our latest [Docker images](https://docs.axolotl.ai/docs/docker.html). + + Here is an example of how to install from main for pip: + +```bash +# Ensure you have Pytorch installed (Pytorch 2.6.0 min) +git clone https://github.com/axolotl-ai-cloud/axolotl.git +cd axolotl + +pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install --no-build-isolation -e '.[flash-attn]' +``` + +2. Run the finetuning example: + +```bash +axolotl train examples/arcee/afm-4.5b-qlora.yaml +``` + +This config uses about 7.8GiB VRAM. + +Let us know how it goes. Happy finetuning! 🚀 + +### TIPS + +- For inference, the official Arcee.ai team recommends `top_p: 0.95`, `temperature: 0.5`, `top_k: 50`, and `repeat_penalty: 1.1`. +- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config. +- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html). +- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template). + +## Optimization Guides + +- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html) +- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html) +- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html) + +## Related Resources + +- [AFM Blog](https://docs.arcee.ai/arcee-foundation-models/introduction-to-arcee-foundation-models) +- [Axolotl Docs](https://docs.axolotl.ai) +- [Axolotl Website](https://axolotl.ai) +- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl) +- [Axolotl Discord](https://discord.gg/7m9sfhzaf3) diff --git a/examples/arcee/afm-4.5b-qlora.yaml b/examples/arcee/afm-4.5b-qlora.yaml new file mode 100644 index 000000000..2cb42cacd --- /dev/null +++ b/examples/arcee/afm-4.5b-qlora.yaml @@ -0,0 +1,64 @@ +base_model: arcee-ai/AFM-4.5B + +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +plugins: + - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin + +load_in_8bit: false +load_in_4bit: true + +datasets: + - path: fozziethebeat/alpaca_messages_2k_test + type: chat_template + +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./outputs/lora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 2048 +sample_packing: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +bf16: auto +tf32: false + +gradient_checkpointing: true +resume_from_checkpoint: +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 1 +saves_per_epoch: 1 + +# save_first_step: true # uncomment this to validate checkpoint saving works with your config diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb index c283092be..d79c2fb09 100644 --- a/examples/colab-notebooks/colab-axolotl-example.ipynb +++ b/examples/colab-notebooks/colab-axolotl-example.ipynb @@ -40,7 +40,7 @@ "%%capture\n", "# This step can take ~5-10 minutes to install dependencies\n", "!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n", - "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169\"" + "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8\"" ] }, { diff --git a/examples/magistral/magistral-small-fsdp-qlora.yaml b/examples/magistral/magistral-small-fsdp-qlora.yaml index 14a7ee219..d46c49fe0 100644 --- a/examples/magistral/magistral-small-fsdp-qlora.yaml +++ b/examples/magistral/magistral-small-fsdp-qlora.yaml @@ -27,7 +27,6 @@ sequence_len: 2048 sample_packing: true eval_sample_packing: false - lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/examples/magistral/magistral-small-qlora.yaml b/examples/magistral/magistral-small-qlora.yaml index 5ec2f0fbf..188924d39 100644 --- a/examples/magistral/magistral-small-qlora.yaml +++ b/examples/magistral/magistral-small-qlora.yaml @@ -26,7 +26,6 @@ lora_model_dir: sequence_len: 2048 sample_packing: true - lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/examples/magistral/magistral-small-think-qlora.yaml b/examples/magistral/magistral-small-think-qlora.yaml index 0e8a9c1f7..b715b3156 100644 --- a/examples/magistral/magistral-small-think-qlora.yaml +++ b/examples/magistral/magistral-small-think-qlora.yaml @@ -26,7 +26,6 @@ lora_model_dir: sequence_len: 2048 sample_packing: true - lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/scripts/cutcrossentropy_install.py b/scripts/cutcrossentropy_install.py index cf9ced60c..195aac2e2 100644 --- a/scripts/cutcrossentropy_install.py +++ b/scripts/cutcrossentropy_install.py @@ -29,5 +29,5 @@ UV_PREFIX = "uv " if USE_UV else "" print( UNINSTALL_PREFIX - + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"' + + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"' ) diff --git a/src/axolotl/integrations/cut_cross_entropy/README.md b/src/axolotl/integrations/cut_cross_entropy/README.md index e0ff14db8..7924d3472 100644 --- a/src/axolotl/integrations/cut_cross_entropy/README.md +++ b/src/axolotl/integrations/cut_cross_entropy/README.md @@ -19,7 +19,7 @@ python scripts/cutcrossentropy_install.py | sh - If you are installing from pip ```bash -pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169" +pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8" ``` ## Usage @@ -31,6 +31,7 @@ plugins: ## Supported Models +- arcee - cohere - cohere2 - gemma @@ -41,13 +42,17 @@ plugins: - gemma3n_text - glm - glm4 +- gpt_oss - granite - granitemoe +- hunyuan_v1_dense +- hunyuan_v1_moe - llama - llama4 - llama4_text - mistral - mistral3 +- mixtral - mllama - phi - phi3 diff --git a/src/axolotl/integrations/cut_cross_entropy/__init__.py b/src/axolotl/integrations/cut_cross_entropy/__init__.py index 24cd7b6a7..6f529f10e 100644 --- a/src/axolotl/integrations/cut_cross_entropy/__init__.py +++ b/src/axolotl/integrations/cut_cross_entropy/__init__.py @@ -34,7 +34,7 @@ LOG = get_logger(__name__) _CCE_INSTALL_MESSAGE = ( "Please install Axolotl's fork of cut_cross_entropy with transformers support using " - '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"`' + '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"`' ) diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py index 5fc5ae856..7df9877d7 100644 --- a/src/axolotl/monkeypatch/multipack.py +++ b/src/axolotl/monkeypatch/multipack.py @@ -37,6 +37,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [ "glm4", "smollm3", "gpt_oss", + "arcee", ]