diff --git a/examples/trinity/README.md b/examples/trinity/README.md new file mode 100644 index 000000000..28b2e2b52 --- /dev/null +++ b/examples/trinity/README.md @@ -0,0 +1,38 @@ +# Finetune ArceeAI's Trinity with Axolotl + +[Trinity](https://huggingface.co/collections/arcee-ai/trinity) is a family of open weight MoE models trained by Arcee.ai. + +This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking. + +## Getting started + +1. Install Axolotl following the main from the [installation guide](https://docs.axolotl.ai/docs/installation.html#sec-edge-build). + +2. Run the finetuning example: + + ```bash + axolotl train examples/trinity/trinity-nano-preview-qlora.yaml + ``` + +This config uses about 24.9 GiB VRAM. + +Let us know how it goes. Happy finetuning! 🚀 + +### TIPS + +- For inference, the official Arcee.ai team recommends `top_p: 0.75`, `temperature: 0.15`, `top_k: 50`, and `min_p: 0.06`. +- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config. +- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html). +- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template). + +## Optimization Guides + +Please check the [Optimizations doc](https://docs.axolotl.ai/docs/optimizations.html). + +## Related Resources + +- [Trinity Blog](https://www.arcee.ai/blog/the-trinity-manifesto) +- [Axolotl Docs](https://docs.axolotl.ai) +- [Axolotl Website](https://axolotl.ai) +- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl) +- [Axolotl Discord](https://discord.gg/7m9sfhzaf3) diff --git a/examples/trinity/trinity-nano-preview-qlora.yaml b/examples/trinity/trinity-nano-preview-qlora.yaml new file mode 100644 index 000000000..43263cabd --- /dev/null +++ b/examples/trinity/trinity-nano-preview-qlora.yaml @@ -0,0 +1,67 @@ +base_model: arcee-ai/Trinity-Nano-Preview +trust_remote_code: true + +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +# CCE - N/A as of now +# plugins: +# - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin + +load_in_8bit: false +load_in_4bit: true + +datasets: + - path: fozziethebeat/alpaca_messages_2k_test + type: chat_template + +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./outputs/lora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 2048 +sample_packing: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +bf16: auto +tf32: false + +gradient_checkpointing: true +resume_from_checkpoint: +logging_steps: 1 +# flash_attention: true # Not supported +sdp_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 1 +saves_per_epoch: 1 + +# save_first_step: true # uncomment this to validate checkpoint saving works with your config diff --git a/src/axolotl/common/architectures.py b/src/axolotl/common/architectures.py index c8a2f0836..f4d6ca928 100644 --- a/src/axolotl/common/architectures.py +++ b/src/axolotl/common/architectures.py @@ -17,4 +17,5 @@ MOE_ARCH_BLOCK = { "deepseek_v3": "DeepseekV3MoE", "gpt_oss": "GptOssDecoderLayer", "lfm2_moe": "Lfm2MoeSparseMoeBlock", + "afmoe": "AfmoeMoE", } diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py index fdda3c3bc..9642b1edb 100644 --- a/src/axolotl/monkeypatch/multipack.py +++ b/src/axolotl/monkeypatch/multipack.py @@ -52,6 +52,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [ "olmo", "olmo2", "olmo3", + "afmoe", ]