diff --git a/examples/LiquidAI/README.md b/examples/LiquidAI/README.md index 96fc74a92..8a18d9eb1 100644 --- a/examples/LiquidAI/README.md +++ b/examples/LiquidAI/README.md @@ -6,6 +6,8 @@ LFM2 features a new hybrid Liquid architecture with multiplicative gates, short- This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl. +Thanks to the team at LiquidAI for giving us early access to prepare for these releases. + ## Getting Started 1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). @@ -31,6 +33,14 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl. axolotl train examples/LiquidAI/lfm2-vl-lora.yaml ``` + **LFM2-MoE** + ```bash + pip install git+https://github.com/huggingface/transformers.git@0c9a72e4576fe4c84077f066e585129c97bfd4e6 + + # LoRA SFT (1x48GB @ 16.2GiB) + axolotl train examples/LiquidAI/lfm2-8b-a1b-lora.yaml + ``` + ### TIPS - **Installation Error**: If you encounter `ImportError: ... undefined symbol ...` or `ModuleNotFoundError: No module named 'causal_conv1d_cuda'`, the `causal-conv1d` package may have been installed incorrectly. Try uninstalling it: @@ -45,14 +55,13 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl. ## Optimization Guides -- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html) -- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html) -- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html) +- [Optimizations Guide](https://docs.axolotl.ai/docs/optimizations.html) ## Related Resources - [LFM2 Blog](https://www.liquid.ai/blog/liquid-foundation-models-v2-our-second-series-of-generative-ai-models) - [LFM2-VL Blog](https://www.liquid.ai/blog/lfm2-vl-efficient-vision-language-models) +- [LFM2-MoE Blog](https://www.liquid.ai/blog/lfm2-8b-a1b-an-efficient-on-device-mixture-of-experts) - [Axolotl Docs](https://docs.axolotl.ai) - [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl) - [Axolotl Discord](https://discord.gg/7m9sfhzaf3) diff --git a/examples/LiquidAI/lfm2-350m-fft.yaml b/examples/LiquidAI/lfm2-350m-fft.yaml index d19815008..145b56dd1 100644 --- a/examples/LiquidAI/lfm2-350m-fft.yaml +++ b/examples/LiquidAI/lfm2-350m-fft.yaml @@ -1,6 +1,7 @@ base_model: LiquidAI/LFM2-350M -chunked_cross_entropy: true +plugins: + - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin eot_tokens: - "<|im_end|>" diff --git a/examples/LiquidAI/lfm2-8b-a1b-lora.yaml b/examples/LiquidAI/lfm2-8b-a1b-lora.yaml new file mode 100644 index 000000000..73cbfcce7 --- /dev/null +++ b/examples/LiquidAI/lfm2-8b-a1b-lora.yaml @@ -0,0 +1,59 @@ +base_model: LiquidAI/LFM2-8B-A1B + +plugins: + - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin + +load_in_8bit: true + +eot_tokens: + - "<|im_end|>" +datasets: + - path: mlabonne/FineTome-100k + type: chat_template + split: train[:20%] + field_messages: conversations + message_field_role: from + message_field_content: value +dataset_prepared_path: last_run_prepared +val_set_size: 0.05 +output_dir: ./outputs/out + +sequence_len: 4096 +sample_packing: true + +adapter: lora +lora_model_dir: + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: 'model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj' + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 2 +micro_batch_size: 4 +num_epochs: 1 +optimizer: adamw_torch_fused +lr_scheduler: cosine +learning_rate: 5e-5 + +bf16: true +tf32: true + +gradient_checkpointing: true +resume_from_checkpoint: +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 2 +saves_per_epoch: 1 + +weight_decay: 0.0 + +# save_first_step: true # uncomment this to validate checkpoint saving works with your config diff --git a/examples/LiquidAI/lfm2-vl-lora.yaml b/examples/LiquidAI/lfm2-vl-lora.yaml index 7fee17f92..313da8274 100644 --- a/examples/LiquidAI/lfm2-vl-lora.yaml +++ b/examples/LiquidAI/lfm2-vl-lora.yaml @@ -3,6 +3,9 @@ trust_remote_code: true model_type: AutoModelForImageTextToText processor_type: AutoProcessor +plugins: + - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin + # these 3 lines are needed for now to handle vision chat templates w images skip_prepare_dataset: true remove_unused_columns: false diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb index 9e18757f6..ee99c283f 100644 --- a/examples/colab-notebooks/colab-axolotl-example.ipynb +++ b/examples/colab-notebooks/colab-axolotl-example.ipynb @@ -40,7 +40,7 @@ "%%capture\n", "# This step can take ~5-10 minutes to install dependencies\n", "!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n", - "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28\"" + "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@49f3308\"" ] }, { diff --git a/scripts/cutcrossentropy_install.py b/scripts/cutcrossentropy_install.py index 32f585858..cf8bd57e7 100644 --- a/scripts/cutcrossentropy_install.py +++ b/scripts/cutcrossentropy_install.py @@ -29,5 +29,5 @@ UV_PREFIX = "uv " if USE_UV else "" print( UNINSTALL_PREFIX - + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"' + + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@49f3308"' ) diff --git a/src/axolotl/common/architectures.py b/src/axolotl/common/architectures.py index ce945e670..b754e56ba 100644 --- a/src/axolotl/common/architectures.py +++ b/src/axolotl/common/architectures.py @@ -14,4 +14,5 @@ MOE_ARCH_BLOCK = { "qwen3_moe": "Qwen3MoeSparseMoeBlock", "deepseek_v2": "DeepseekV2MoE", "gpt_oss": "GptOssDecoderLayer", + "lfm2_moe": "Lfm2MoeSparseMoeBlock", } diff --git a/src/axolotl/integrations/cut_cross_entropy/README.md b/src/axolotl/integrations/cut_cross_entropy/README.md index c33d45f00..08cd41200 100644 --- a/src/axolotl/integrations/cut_cross_entropy/README.md +++ b/src/axolotl/integrations/cut_cross_entropy/README.md @@ -19,7 +19,7 @@ python scripts/cutcrossentropy_install.py | sh - If you are installing from pip ```bash -pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28" +pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@49f3308" ``` ## Usage @@ -54,9 +54,13 @@ plugins: - granitemoehybrid - hunyuan_v1_dense - hunyuan_v1_moe +- lfm2 +- lfm2_moe +- lfm2_vl - llama - llama4 - llama4_text +- llava - mistral - mistral3 - mixtral diff --git a/src/axolotl/integrations/cut_cross_entropy/__init__.py b/src/axolotl/integrations/cut_cross_entropy/__init__.py index e8c6c23a3..ed6ebe62a 100644 --- a/src/axolotl/integrations/cut_cross_entropy/__init__.py +++ b/src/axolotl/integrations/cut_cross_entropy/__init__.py @@ -35,7 +35,7 @@ LOG = get_logger(__name__) _CCE_INSTALL_MESSAGE = ( "Please install Axolotl's fork of cut_cross_entropy with transformers support using " - '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"`' + '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@49f3308"`' ) diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py index 4741245e1..48b4ea10e 100644 --- a/src/axolotl/monkeypatch/multipack.py +++ b/src/axolotl/monkeypatch/multipack.py @@ -45,6 +45,8 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [ "gpt_oss", "arcee", "seed_oss", + "lfm2", + "lfm2_moe", ]