From 138e8ed7f58e1bb6e583e827fd4bc2331ecbd8fc Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Tue, 17 Mar 2026 02:47:04 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- docs/custom_integrations.html | 4 +- docs/multimodal.html | 110 ++-- .../colab-axolotl-example.html | 2 +- index.html | 2 +- search.json | 10 +- sitemap.xml | 474 +++++++++--------- 7 files changed, 306 insertions(+), 298 deletions(-) diff --git a/.nojekyll b/.nojekyll index 09ce6e542..73268f02f 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -756ab801 \ No newline at end of file +3ace7c03 \ No newline at end of file diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html index 45fb54ec6..e8c9dac3e 100644 --- a/docs/custom_integrations.html +++ b/docs/custom_integrations.html @@ -963,7 +963,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); -
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@e8ad129"
+
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@fa9a7fe"

Usage

@@ -1015,8 +1015,10 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
  • ministral3
  • mistral
  • mistral3
  • +
  • mistral4
  • mixtral
  • mllama
  • +
  • nemotron_h
  • olmo
  • olmo2
  • olmo3
  • diff --git a/docs/multimodal.html b/docs/multimodal.html index 21a87e37a..0e8b6cadd 100644 --- a/docs/multimodal.html +++ b/docs/multimodal.html @@ -762,6 +762,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
  • Pixtral
  • Llava-1.5
  • Mistral-Small-3.1
  • +
  • Mistral-Small-4
  • Magistral-Small-2509
  • Voxtral
  • Gemma-3
  • @@ -815,6 +816,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
  • Pixtral
  • Llava-1.5
  • Mistral-Small-3.1
  • +
  • Mistral-Small-4
  • Magistral-Small-2509
  • Voxtral
  • Gemma-3
  • @@ -922,6 +924,10 @@ Tip
    base_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503
    +
    +

    Mistral-Small-4

    +
    base_model: mistralai/Mistral-Small-4-119B-2603
    +

    Magistral-Small-2509

    @@ -937,7 +943,7 @@ Tip

    Please make sure to install vision lib via pip install 'mistral-common[opencv]==1.8.5'

    -
    base_model: mistralai/Magistral-Small-2509
    +
    base_model: mistralai/Magistral-Small-2509

    Voxtral

    @@ -954,9 +960,9 @@ Tip

    Please make sure to install audio lib via pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'

    -
    base_model: mistralai/Voxtral-Mini-3B-2507
    -
    -processor_type: VoxtralProcessor
    +
    base_model: mistralai/Voxtral-Mini-3B-2507
    +
    +processor_type: VoxtralProcessor

    Gemma-3

    @@ -974,9 +980,9 @@ Tip

    For multi-modal 4B/12B/27B models, use the following config:

    -
    base_model: google/gemma-3-4b-it
    -
    -chat_template: gemma3
    +
    base_model: google/gemma-3-4b-it
    +
    +chat_template: gemma3

    Gemma-3n

    @@ -1006,36 +1012,36 @@ Tip

    Please make sure to install timm via pip3 install timm==1.0.17

    -
    base_model: google/gemma-3n-E2B-it
    -
    -chat_template: gemma3n
    +
    base_model: google/gemma-3n-E2B-it
    +
    +chat_template: gemma3n

    Qwen2-VL

    -
    base_model: Qwen/Qwen2-VL-7B-Instruct
    -
    -chat_template: qwen2_vl
    +
    base_model: Qwen/Qwen2-VL-7B-Instruct
    +
    +chat_template: qwen2_vl

    Qwen2.5-VL

    -
    base_model: Qwen/Qwen2.5-VL-7B-Instruct
    -
    -chat_template: qwen2_vl  # same as qwen2-vl
    +
    base_model: Qwen/Qwen2.5-VL-7B-Instruct
    +
    +chat_template: qwen2_vl  # same as qwen2-vl

    Qwen3-VL

    -
    base_model: Qwen/Qwen3-VL-4B-Instruct
    -
    -chat_template: qwen2_vl  # same as qwen2-vl
    +
    base_model: Qwen/Qwen3-VL-4B-Instruct
    +
    +chat_template: qwen2_vl  # same as qwen2-vl

    GLM-4.6V

    Both GLM-4.6V (106B MoE) and GLM-4.6V-Flash (9B) are supported.

    -
    # GLM-4.6V (106B MoE version)
    -base_model: zai-org/GLM-4.6V
    -
    -# OR GLM-4.6V-Flash (9B version)
    -base_model: zai-org/GLM-4.6V-Flash
    +
    # GLM-4.6V (106B MoE version)
    +base_model: zai-org/GLM-4.6V
    +
    +# OR GLM-4.6V-Flash (9B version)
    +base_model: zai-org/GLM-4.6V-Flash

    SmolVLM2

    @@ -1052,7 +1058,7 @@ Tip

    Please make sure to install num2words via pip3 install num2words==0.5.14

    -
    base_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct
    +
    base_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct

    LFM2-VL

    @@ -1069,7 +1075,7 @@ Warning

    Please uninstall causal-conv1d via pip3 uninstall -y causal-conv1d

    -
    base_model: LiquidAI/LFM2-VL-450M
    +
    base_model: LiquidAI/LFM2-VL-450M

    Intern-VL

    @@ -1086,7 +1092,7 @@ Tip

    Please make sure to install timm via pip3 install timm==1.0.19

    -
    base_model: OpenGVLab/InternVL3_5-8B
    +
    base_model: OpenGVLab/InternVL3_5-8B
    @@ -1171,31 +1177,31 @@ Warning

    Example

    Here is an example of a multi-modal dataset:

    -
    [
    -  {
    -    "messages": [
    -        {
    -            "role": "system",
    -            "content": [
    -              {"type": "text", "text": "You are a helpful assistant."}
    -              ]
    -        },
    -        {
    -            "role": "user",
    -            "content": [
    -                {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
    -                {"type": "text", "text": "Describe this image in detail."}
    -            ]
    -        },
    -        {
    -            "role": "assistant",
    -            "content": [
    -              {"type": "text", "text": "The image is a bee."}
    -            ]
    -        }
    -    ]
    -  }
    -]
    +
    [
    +  {
    +    "messages": [
    +        {
    +            "role": "system",
    +            "content": [
    +              {"type": "text", "text": "You are a helpful assistant."}
    +              ]
    +        },
    +        {
    +            "role": "user",
    +            "content": [
    +                {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
    +                {"type": "text", "text": "Describe this image in detail."}
    +            ]
    +        },
    +        {
    +            "role": "assistant",
    +            "content": [
    +              {"type": "text", "text": "The image is a bee."}
    +            ]
    +        }
    +    ]
    +  }
    +]
    diff --git a/examples/colab-notebooks/colab-axolotl-example.html b/examples/colab-notebooks/colab-axolotl-example.html index 92cfa3323..1eacd9278 100644 --- a/examples/colab-notebooks/colab-axolotl-example.html +++ b/examples/colab-notebooks/colab-axolotl-example.html @@ -810,7 +810,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
    %%capture
     # This step can take ~5-10 minutes to install dependencies
     !pip install --no-build-isolation axolotl[flash-attn]>=0.9.1
    -!pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@e8ad129"
    +!pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@fa9a7fe"

    Demo: Talk Like a Pirate

    diff --git a/index.html b/index.html index 52b106a0a..2090ffb38 100644 --- a/index.html +++ b/index.html @@ -809,7 +809,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
    • 2026/03:
    • 2026/02: diff --git a/search.json b/search.json index dc49843fe..d57266d48 100644 --- a/search.json +++ b/search.json @@ -3099,7 +3099,7 @@ "href": "index.html#latest-updates", "title": "Axolotl", "section": "🎉 Latest Updates", - "text": "🎉 Latest Updates\n\n2026/03:\n\nNew model support has been added in Axolotl for Qwen3.5, Qwen3.5 MoE, GLM-4.7-Flash, GLM-4.6V, and GLM-4.5-Air.\nMoE expert quantization support (via quantize_moe_experts: true) greatly reduces VRAM when training MoE models (FSDP2 compat).\n\n2026/02:\n\nScatterMoE LoRA support. LoRA fine-tuning directly on MoE expert weights using custom Triton kernels.\nAxolotl now has support for SageAttention and GDPO (Generalized DPO).\n\n2026/01:\n\nNew integration for EAFT (Entropy-Aware Focal Training), weights loss by entropy of the top-k logit distribution, and Scalable Softmax, improves long context in attention.\n\n2025/12:\n\nAxolotl now includes support for Kimi-Linear, Plano-Orchestrator, MiMo, InternVL 3.5, Olmo3, Trinity, and Ministral3.\nDistributed Muon Optimizer support has been added for FSDP2 pretraining.\n\n2025/10: New model support has been added in Axolotl for: Qwen3 Next, Qwen2.5-vl, Qwen3-vl, Qwen3, Qwen3MoE, Granite 4, HunYuan, Magistral 2509, Apertus, and Seed-OSS.\n\n\n\nExpand older updates\n\n\n2025/09: Axolotl now has text diffusion training. Read more here.\n2025/08: QAT has been updated to include NVFP4 support. See PR.\n2025/07:\n\nND Parallelism support has been added into Axolotl. Compose Context Parallelism (CP), Tensor Parallelism (TP), and Fully Sharded Data Parallelism (FSDP) within a single node and across multiple nodes. Check out the blog post for more info.\nAxolotl adds more models: GPT-OSS, Gemma 3n, Liquid Foundation Model 2 (LFM2), and Arcee Foundation Models (AFM).\nFP8 finetuning with fp8 gather op is now possible in Axolotl via torchao. Get started here!\nVoxtral, Magistral 1.1, and Devstral with mistral-common tokenizer support has been integrated in Axolotl!\nTiledMLP support for single-GPU to multi-GPU training with DDP, DeepSpeed and FSDP support has been added to support Arctic Long Sequence Training. (ALST). See examples for using ALST with Axolotl!\n\n2025/06: Magistral with mistral-common tokenizer support has been added to Axolotl. See docs to start training your own Magistral models with Axolotl!\n2025/05: Quantization Aware Training (QAT) support has been added to Axolotl. Explore the docs to learn more!\n2025/04: Llama 4 support has been added in Axolotl. See docs to start training your own Llama 4 models with Axolotl’s linearized version!\n2025/03: Axolotl has implemented Sequence Parallelism (SP) support. Read the blog and docs to learn how to scale your context length when fine-tuning.\n2025/03: (Beta) Fine-tuning Multimodal models is now supported in Axolotl. Check out the docs to fine-tune your own!\n2025/02: Axolotl has added LoRA optimizations to reduce memory usage and improve training speed for LoRA and QLoRA in single GPU and multi-GPU training (DDP and DeepSpeed). Jump into the docs to give it a try.\n2025/02: Axolotl has added GRPO support. Dive into our blog and GRPO example and have some fun!\n2025/01: Axolotl has added Reward Modelling / Process Reward Modelling fine-tuning support. See docs.", + "text": "🎉 Latest Updates\n\n2026/03:\n\nNew model support has been added in Axolotl for [Qwen3.5, Qwen3.5 MoE, GLM-4.7-Flash, GLM-4.6V, and GLM-4.5-Air.\nMoE expert quantization support (via quantize_moe_experts: true) greatly reduces VRAM when training MoE models (FSDP2 compat).\n\n2026/02:\n\nScatterMoE LoRA support. LoRA fine-tuning directly on MoE expert weights using custom Triton kernels.\nAxolotl now has support for SageAttention and GDPO (Generalized DPO).\n\n2026/01:\n\nNew integration for EAFT (Entropy-Aware Focal Training), weights loss by entropy of the top-k logit distribution, and Scalable Softmax, improves long context in attention.\n\n2025/12:\n\nAxolotl now includes support for Kimi-Linear, Plano-Orchestrator, MiMo, InternVL 3.5, Olmo3, Trinity, and Ministral3.\nDistributed Muon Optimizer support has been added for FSDP2 pretraining.\n\n2025/10: New model support has been added in Axolotl for: Qwen3 Next, Qwen2.5-vl, Qwen3-vl, Qwen3, Qwen3MoE, Granite 4, HunYuan, Magistral 2509, Apertus, and Seed-OSS.\n\n\n\nExpand older updates\n\n\n2025/09: Axolotl now has text diffusion training. Read more here.\n2025/08: QAT has been updated to include NVFP4 support. See PR.\n2025/07:\n\nND Parallelism support has been added into Axolotl. Compose Context Parallelism (CP), Tensor Parallelism (TP), and Fully Sharded Data Parallelism (FSDP) within a single node and across multiple nodes. Check out the blog post for more info.\nAxolotl adds more models: GPT-OSS, Gemma 3n, Liquid Foundation Model 2 (LFM2), and Arcee Foundation Models (AFM).\nFP8 finetuning with fp8 gather op is now possible in Axolotl via torchao. Get started here!\nVoxtral, Magistral 1.1, and Devstral with mistral-common tokenizer support has been integrated in Axolotl!\nTiledMLP support for single-GPU to multi-GPU training with DDP, DeepSpeed and FSDP support has been added to support Arctic Long Sequence Training. (ALST). See examples for using ALST with Axolotl!\n\n2025/06: Magistral with mistral-common tokenizer support has been added to Axolotl. See docs to start training your own Magistral models with Axolotl!\n2025/05: Quantization Aware Training (QAT) support has been added to Axolotl. Explore the docs to learn more!\n2025/04: Llama 4 support has been added in Axolotl. See docs to start training your own Llama 4 models with Axolotl’s linearized version!\n2025/03: Axolotl has implemented Sequence Parallelism (SP) support. Read the blog and docs to learn how to scale your context length when fine-tuning.\n2025/03: (Beta) Fine-tuning Multimodal models is now supported in Axolotl. Check out the docs to fine-tune your own!\n2025/02: Axolotl has added LoRA optimizations to reduce memory usage and improve training speed for LoRA and QLoRA in single GPU and multi-GPU training (DDP and DeepSpeed). Jump into the docs to give it a try.\n2025/02: Axolotl has added GRPO support. Dive into our blog and GRPO example and have some fun!\n2025/01: Axolotl has added Reward Modelling / Process Reward Modelling fine-tuning support. See docs.", "crumbs": [ "Home" ] @@ -3514,7 +3514,7 @@ "href": "docs/custom_integrations.html#cut-cross-entropy", "title": "Custom Integrations", "section": "Cut Cross Entropy", - "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@e8ad129\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\nafmoe\napertus\narcee\ncohere\ncohere2\ndeepseek_v3\nexaone4\ngemma\ngemma2\ngemma3\ngemma3_text\ngemma3n\ngemma3n_text\nglm\nglm4\nglm4_moe\nglm4_moe_lite\nglm46v\nglm4v\nglm4v_moe\nglm_image\nglm_moe_dsa\ngpt_oss\ngranite\ngranitemoe\ngranitemoehybrid\ngranitemoeshared\nhunyuan_v1_dense\nhunyuan_v1_moe\ninternvl\nkimi_linear\nlfm2\nlfm2_moe\nlfm2_vl\nllama\nllama4\nllama4_text\nllava\nministral\nministral3\nmistral\nmistral3\nmixtral\nmllama\nolmo\nolmo2\nolmo3\nolmoe\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_5_vl\nqwen2_moe\nqwen2_vl\nqwen3\nqwen3_5\nqwen3_5_text\nqwen3_5_moe\nqwen3_5_moe_text\nqwen3_moe\nqwen3_next\nqwen3_vl\nqwen3_vl_moe\nseed_oss\nsmollm3\nstep3p5\nvoxtral\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", + "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@fa9a7fe\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\nafmoe\napertus\narcee\ncohere\ncohere2\ndeepseek_v3\nexaone4\ngemma\ngemma2\ngemma3\ngemma3_text\ngemma3n\ngemma3n_text\nglm\nglm4\nglm4_moe\nglm4_moe_lite\nglm46v\nglm4v\nglm4v_moe\nglm_image\nglm_moe_dsa\ngpt_oss\ngranite\ngranitemoe\ngranitemoehybrid\ngranitemoeshared\nhunyuan_v1_dense\nhunyuan_v1_moe\ninternvl\nkimi_linear\nlfm2\nlfm2_moe\nlfm2_vl\nllama\nllama4\nllama4_text\nllava\nministral\nministral3\nmistral\nmistral3\nmistral4\nmixtral\nmllama\nnemotron_h\nolmo\nolmo2\nolmo3\nolmoe\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_5_vl\nqwen2_moe\nqwen2_vl\nqwen3\nqwen3_5\nqwen3_5_text\nqwen3_5_moe\nqwen3_5_moe_text\nqwen3_moe\nqwen3_next\nqwen3_vl\nqwen3_vl_moe\nseed_oss\nsmollm3\nstep3p5\nvoxtral\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", "crumbs": [ "Advanced Features", "Custom Integrations" @@ -5163,7 +5163,7 @@ "href": "docs/multimodal.html", "title": "MultiModal / Vision Language Models (BETA)", "section": "", - "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nMagistral-Small-2509\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nGLM-4.6V\nSmolVLM2\nLFM2-VL\nIntern-VL", + "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nMistral-Small-4\nMagistral-Small-2509\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nGLM-4.6V\nSmolVLM2\nLFM2-VL\nIntern-VL", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" @@ -5174,7 +5174,7 @@ "href": "docs/multimodal.html#supported-models", "title": "MultiModal / Vision Language Models (BETA)", "section": "", - "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nMagistral-Small-2509\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nGLM-4.6V\nSmolVLM2\nLFM2-VL\nIntern-VL", + "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nMistral-Small-4\nMagistral-Small-2509\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nGLM-4.6V\nSmolVLM2\nLFM2-VL\nIntern-VL", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" @@ -5185,7 +5185,7 @@ "href": "docs/multimodal.html#usage", "title": "MultiModal / Vision Language Models (BETA)", "section": "Usage", - "text": "Usage\nMultimodal support is limited and doesn’t have full feature parity.\nHere are the hyperparams you’ll need to use to finetune a multimodal model.\nprocessor_type: AutoProcessor\n\nskip_prepare_dataset: true\nremove_unused_columns: false # leave columns in place as they are needed to handle image embeddings during training\nsample_packing: false # not yet supported with multimodal\n\nchat_template: # see in next section if specified\n\n# example dataset\ndatasets:\n - path: HuggingFaceH4/llava-instruct-mix-vsft\n type: chat_template\n split: train[:1%]\n\n# (optional) if doing lora, only finetune the Language model,\n# leave the vision model and vision tower frozen\n# load_in_8bit: true\nadapter: lora\nlora_target_modules: 'model.language_model.layers.[\\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'\n\n# (optional) if you want to resize images to a set size\nimage_size: 512\nimage_resize_algorithm: bilinear\nPlease see examples folder for full configs.\n\n\n\n\n\n\nTip\n\n\n\nSome of our chat_templates have been extended to support broader dataset types. This should not break any existing configs.\n\n\n\n\n\n\n\n\nNote\n\n\n\nAs of now, we do not truncate nor drop samples based on sequence_len as each arch has different ways to process non-text tokens. We are looking for help on this.\n\n\n\nMllama\nbase_model: meta-llama/Llama-3.2-11B-Vision-Instruct\n\nchat_template: llama3_2_vision\n\n\nLlama4\nbase_model: meta-llama/Llama-4-Scout-17B-16E-Instruct\n\nchat_template: llama4\n\n\nPixtral\nbase_model: mistralai/Pixtral-12B-2409\n\nchat_template: pixtral\n\n\nLlava-1.5\nbase_model: llava-hf/llava-1.5-7b-hf\n\nchat_template: llava\n\n\nMistral-Small-3.1\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install vision lib via pip install 'mistral-common[opencv]==1.8.5'\n\n\nbase_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503\n\n\nMagistral-Small-2509\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install vision lib via pip install 'mistral-common[opencv]==1.8.5'\n\n\nbase_model: mistralai/Magistral-Small-2509\n\n\nVoxtral\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install audio lib via pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'\n\n\nbase_model: mistralai/Voxtral-Mini-3B-2507\n\nprocessor_type: VoxtralProcessor\n\n\nGemma-3\n\n\n\n\n\n\nTip\n\n\n\nThe Gemma3-1B model is a text-only model, so please train as regular text model.\n\n\nFor multi-modal 4B/12B/27B models, use the following config:\nbase_model: google/gemma-3-4b-it\n\nchat_template: gemma3\n\n\nGemma-3n\n\n\n\n\n\n\nWarning\n\n\n\nThe model’s initial loss and grad norm will be very high. We suspect this to be due to the Conv in the vision layers.\n\n\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install timm via pip3 install timm==1.0.17\n\n\nbase_model: google/gemma-3n-E2B-it\n\nchat_template: gemma3n\n\n\nQwen2-VL\nbase_model: Qwen/Qwen2-VL-7B-Instruct\n\nchat_template: qwen2_vl\n\n\nQwen2.5-VL\nbase_model: Qwen/Qwen2.5-VL-7B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl\n\n\nQwen3-VL\nbase_model: Qwen/Qwen3-VL-4B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl\n\n\nGLM-4.6V\nBoth GLM-4.6V (106B MoE) and GLM-4.6V-Flash (9B) are supported.\n# GLM-4.6V (106B MoE version)\nbase_model: zai-org/GLM-4.6V\n\n# OR GLM-4.6V-Flash (9B version)\nbase_model: zai-org/GLM-4.6V-Flash\n\n\nSmolVLM2\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install num2words via pip3 install num2words==0.5.14\n\n\nbase_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct\n\n\nLFM2-VL\n\n\n\n\n\n\nWarning\n\n\n\nPlease uninstall causal-conv1d via pip3 uninstall -y causal-conv1d\n\n\nbase_model: LiquidAI/LFM2-VL-450M\n\n\nIntern-VL\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install timm via pip3 install timm==1.0.19\n\n\nbase_model: OpenGVLab/InternVL3_5-8B", + "text": "Usage\nMultimodal support is limited and doesn’t have full feature parity.\nHere are the hyperparams you’ll need to use to finetune a multimodal model.\nprocessor_type: AutoProcessor\n\nskip_prepare_dataset: true\nremove_unused_columns: false # leave columns in place as they are needed to handle image embeddings during training\nsample_packing: false # not yet supported with multimodal\n\nchat_template: # see in next section if specified\n\n# example dataset\ndatasets:\n - path: HuggingFaceH4/llava-instruct-mix-vsft\n type: chat_template\n split: train[:1%]\n\n# (optional) if doing lora, only finetune the Language model,\n# leave the vision model and vision tower frozen\n# load_in_8bit: true\nadapter: lora\nlora_target_modules: 'model.language_model.layers.[\\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'\n\n# (optional) if you want to resize images to a set size\nimage_size: 512\nimage_resize_algorithm: bilinear\nPlease see examples folder for full configs.\n\n\n\n\n\n\nTip\n\n\n\nSome of our chat_templates have been extended to support broader dataset types. This should not break any existing configs.\n\n\n\n\n\n\n\n\nNote\n\n\n\nAs of now, we do not truncate nor drop samples based on sequence_len as each arch has different ways to process non-text tokens. We are looking for help on this.\n\n\n\nMllama\nbase_model: meta-llama/Llama-3.2-11B-Vision-Instruct\n\nchat_template: llama3_2_vision\n\n\nLlama4\nbase_model: meta-llama/Llama-4-Scout-17B-16E-Instruct\n\nchat_template: llama4\n\n\nPixtral\nbase_model: mistralai/Pixtral-12B-2409\n\nchat_template: pixtral\n\n\nLlava-1.5\nbase_model: llava-hf/llava-1.5-7b-hf\n\nchat_template: llava\n\n\nMistral-Small-3.1\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install vision lib via pip install 'mistral-common[opencv]==1.8.5'\n\n\nbase_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503\n\n\nMistral-Small-4\nbase_model: mistralai/Mistral-Small-4-119B-2603\n\n\nMagistral-Small-2509\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install vision lib via pip install 'mistral-common[opencv]==1.8.5'\n\n\nbase_model: mistralai/Magistral-Small-2509\n\n\nVoxtral\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install audio lib via pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'\n\n\nbase_model: mistralai/Voxtral-Mini-3B-2507\n\nprocessor_type: VoxtralProcessor\n\n\nGemma-3\n\n\n\n\n\n\nTip\n\n\n\nThe Gemma3-1B model is a text-only model, so please train as regular text model.\n\n\nFor multi-modal 4B/12B/27B models, use the following config:\nbase_model: google/gemma-3-4b-it\n\nchat_template: gemma3\n\n\nGemma-3n\n\n\n\n\n\n\nWarning\n\n\n\nThe model’s initial loss and grad norm will be very high. We suspect this to be due to the Conv in the vision layers.\n\n\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install timm via pip3 install timm==1.0.17\n\n\nbase_model: google/gemma-3n-E2B-it\n\nchat_template: gemma3n\n\n\nQwen2-VL\nbase_model: Qwen/Qwen2-VL-7B-Instruct\n\nchat_template: qwen2_vl\n\n\nQwen2.5-VL\nbase_model: Qwen/Qwen2.5-VL-7B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl\n\n\nQwen3-VL\nbase_model: Qwen/Qwen3-VL-4B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl\n\n\nGLM-4.6V\nBoth GLM-4.6V (106B MoE) and GLM-4.6V-Flash (9B) are supported.\n# GLM-4.6V (106B MoE version)\nbase_model: zai-org/GLM-4.6V\n\n# OR GLM-4.6V-Flash (9B version)\nbase_model: zai-org/GLM-4.6V-Flash\n\n\nSmolVLM2\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install num2words via pip3 install num2words==0.5.14\n\n\nbase_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct\n\n\nLFM2-VL\n\n\n\n\n\n\nWarning\n\n\n\nPlease uninstall causal-conv1d via pip3 uninstall -y causal-conv1d\n\n\nbase_model: LiquidAI/LFM2-VL-450M\n\n\nIntern-VL\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install timm via pip3 install timm==1.0.19\n\n\nbase_model: OpenGVLab/InternVL3_5-8B", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" diff --git a/sitemap.xml b/sitemap.xml index c66221807..8715af66b 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,950 +2,950 @@ https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html - 2026-03-16T04:14:26.693Z + 2026-03-17T02:40:20.788Z https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html - 2026-03-16T04:14:26.717Z + 2026-03-17T02:40:20.810Z https://docs.axolotl.ai/docs/inference.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.781Z https://docs.axolotl.ai/docs/expert_quantization.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/installation.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.781Z https://docs.axolotl.ai/docs/models/ministral3/think.html - 2026-03-16T04:18:58.114Z + 2026-03-17T02:44:28.684Z https://docs.axolotl.ai/docs/models/granite4.html - 2026-03-16T04:18:58.124Z + 2026-03-17T02:44:28.692Z https://docs.axolotl.ai/docs/models/seed-oss.html - 2026-03-16T04:18:58.123Z + 2026-03-17T02:44:28.692Z https://docs.axolotl.ai/docs/models/orpheus.html - 2026-03-16T04:18:58.125Z + 2026-03-17T02:44:28.694Z https://docs.axolotl.ai/docs/models/internvl3_5.html - 2026-03-16T04:18:58.111Z + 2026-03-17T02:44:28.682Z https://docs.axolotl.ai/docs/models/magistral/vision.html - 2026-03-16T04:18:58.117Z + 2026-03-17T02:44:28.686Z https://docs.axolotl.ai/docs/models/mimo.html - 2026-03-16T04:18:58.110Z + 2026-03-17T02:44:28.682Z https://docs.axolotl.ai/docs/models/gpt-oss.html - 2026-03-16T04:18:58.122Z + 2026-03-17T02:44:28.691Z https://docs.axolotl.ai/docs/models/qwen3-next.html - 2026-03-16T04:18:58.120Z + 2026-03-17T02:44:28.690Z https://docs.axolotl.ai/docs/models/llama-2.html - 2026-03-16T04:18:58.120Z + 2026-03-17T02:44:28.689Z https://docs.axolotl.ai/docs/models/kimi-linear.html - 2026-03-16T04:18:58.109Z + 2026-03-17T02:44:28.681Z https://docs.axolotl.ai/docs/models/smolvlm2.html - 2026-03-16T04:18:58.123Z + 2026-03-17T02:44:28.692Z https://docs.axolotl.ai/docs/models/olmo3.html - 2026-03-16T04:18:58.112Z + 2026-03-17T02:44:28.683Z https://docs.axolotl.ai/docs/models/jamba.html - 2026-03-16T04:18:58.125Z + 2026-03-17T02:44:28.694Z https://docs.axolotl.ai/docs/models/mistral-small.html - 2026-03-16T04:18:58.117Z + 2026-03-17T02:44:28.687Z https://docs.axolotl.ai/docs/models/devstral.html - 2026-03-16T04:18:58.118Z + 2026-03-17T02:44:28.688Z https://docs.axolotl.ai/docs/models/index.html - 2026-03-16T04:18:58.125Z + 2026-03-17T02:44:28.694Z https://docs.axolotl.ai/docs/lora_optims.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.781Z https://docs.axolotl.ai/docs/cli.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/gradient_checkpointing.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/dataset_preprocessing.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/docker.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/attention.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html - 2026-03-16T04:18:34.775Z + 2026-03-17T02:44:04.031Z https://docs.axolotl.ai/docs/api/loaders.adapter.html - 2026-03-16T04:18:34.583Z + 2026-03-17T02:44:03.834Z https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html - 2026-03-16T04:18:35.111Z + 2026-03-17T02:44:04.375Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html - 2026-03-16T04:18:35.113Z + 2026-03-17T02:44:04.376Z https://docs.axolotl.ai/docs/api/core.trainers.utils.html - 2026-03-16T04:18:34.552Z + 2026-03-17T02:44:03.801Z https://docs.axolotl.ai/docs/api/utils.data.sft.html - 2026-03-16T04:18:35.332Z + 2026-03-17T02:44:04.598Z https://docs.axolotl.ai/docs/api/utils.schemas.peft.html - 2026-03-16T04:18:35.425Z + 2026-03-17T02:44:04.691Z https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html - 2026-03-16T04:18:35.099Z + 2026-03-17T02:44:04.362Z https://docs.axolotl.ai/docs/api/utils.collators.mamba.html - 2026-03-16T04:18:35.728Z + 2026-03-17T02:44:04.997Z https://docs.axolotl.ai/docs/api/prompt_strategies.base.html - 2026-03-16T04:18:34.658Z + 2026-03-17T02:44:03.909Z https://docs.axolotl.ai/docs/api/loaders.processor.html - 2026-03-16T04:18:34.576Z + 2026-03-17T02:44:03.827Z https://docs.axolotl.ai/docs/api/core.training_args.html - 2026-03-16T04:18:34.161Z + 2026-03-17T02:44:03.401Z https://docs.axolotl.ai/docs/api/loaders.tokenizer.html - 2026-03-16T04:18:34.575Z + 2026-03-17T02:44:03.825Z https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html - 2026-03-16T04:18:34.767Z + 2026-03-17T02:44:04.022Z https://docs.axolotl.ai/docs/api/loaders.constants.html - 2026-03-16T04:18:34.607Z + 2026-03-17T02:44:03.858Z https://docs.axolotl.ai/docs/api/cli.utils.train.html - 2026-03-16T04:18:34.469Z + 2026-03-17T02:44:03.717Z https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html - 2026-03-16T04:18:35.120Z + 2026-03-17T02:44:04.383Z https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html - 2026-03-16T04:18:35.414Z + 2026-03-17T02:44:04.680Z https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html - 2026-03-16T04:18:34.760Z + 2026-03-17T02:44:04.014Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html - 2026-03-16T04:18:35.054Z + 2026-03-17T02:44:04.316Z https://docs.axolotl.ai/docs/api/common.datasets.html - 2026-03-16T04:18:35.697Z + 2026-03-17T02:44:04.966Z https://docs.axolotl.ai/docs/api/logging_config.html - 2026-03-16T04:18:34.125Z + 2026-03-17T02:44:03.365Z https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html - 2026-03-16T04:18:35.133Z + 2026-03-17T02:44:04.398Z https://docs.axolotl.ai/docs/api/integrations.liger.args.html - 2026-03-16T04:18:35.666Z + 2026-03-17T02:44:04.934Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html - 2026-03-16T04:18:34.829Z + 2026-03-17T02:44:04.087Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html - 2026-03-16T04:18:34.550Z + 2026-03-17T02:44:03.799Z https://docs.axolotl.ai/docs/api/core.chat.messages.html - 2026-03-16T04:18:34.190Z + 2026-03-17T02:44:03.431Z https://docs.axolotl.ai/docs/api/integrations.base.html - 2026-03-16T04:18:35.646Z + 2026-03-17T02:44:04.914Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html - 2026-03-16T04:18:34.816Z + 2026-03-17T02:44:04.073Z https://docs.axolotl.ai/docs/api/train.html - 2026-03-16T04:18:34.023Z + 2026-03-17T02:44:03.260Z https://docs.axolotl.ai/docs/api/utils.distributed.html - 2026-03-16T04:18:35.306Z + 2026-03-17T02:44:04.572Z https://docs.axolotl.ai/docs/api/core.builders.causal.html - 2026-03-16T04:18:34.139Z + 2026-03-17T02:44:03.378Z https://docs.axolotl.ai/docs/api/core.builders.rl.html - 2026-03-16T04:18:34.144Z + 2026-03-17T02:44:03.385Z https://docs.axolotl.ai/docs/api/utils.collators.core.html - 2026-03-16T04:18:35.700Z + 2026-03-17T02:44:04.969Z https://docs.axolotl.ai/docs/api/utils.schemas.model.html - 2026-03-16T04:18:35.382Z + 2026-03-17T02:44:04.649Z https://docs.axolotl.ai/docs/api/kernels.quantize.html - 2026-03-16T04:18:35.043Z + 2026-03-17T02:44:04.305Z https://docs.axolotl.ai/docs/api/utils.schemas.enums.html - 2026-03-16T04:18:35.467Z + 2026-03-17T02:44:04.734Z https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html - 2026-03-16T04:18:34.789Z + 2026-03-17T02:44:04.046Z https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html - 2026-03-16T04:18:35.799Z + 2026-03-17T02:44:05.069Z https://docs.axolotl.ai/docs/api/cli.preprocess.html - 2026-03-16T04:18:34.391Z + 2026-03-17T02:44:03.636Z https://docs.axolotl.ai/docs/api/loaders.model.html - 2026-03-16T04:18:34.564Z + 2026-03-17T02:44:03.813Z https://docs.axolotl.ai/docs/api/cli.merge_lora.html - 2026-03-16T04:18:34.366Z + 2026-03-17T02:44:03.611Z https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html - 2026-03-16T04:18:34.455Z + 2026-03-17T02:44:03.702Z https://docs.axolotl.ai/docs/api/utils.bench.html - 2026-03-16T04:18:35.213Z + 2026-03-17T02:44:04.479Z https://docs.axolotl.ai/docs/api/core.trainers.mamba.html - 2026-03-16T04:18:34.513Z + 2026-03-17T02:44:03.761Z https://docs.axolotl.ai/docs/api/cli.vllm_serve.html - 2026-03-16T04:18:34.404Z + 2026-03-17T02:44:03.652Z https://docs.axolotl.ai/docs/api/kernels.utils.html - 2026-03-16T04:18:35.045Z + 2026-03-17T02:44:04.307Z https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html - 2026-03-16T04:18:34.700Z + 2026-03-17T02:44:03.952Z https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html - 2026-03-16T04:18:35.793Z + 2026-03-17T02:44:05.062Z https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html - 2026-03-16T04:18:35.734Z + 2026-03-17T02:44:05.003Z https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html - 2026-03-16T04:18:35.456Z + 2026-03-17T02:44:04.723Z https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html - 2026-03-16T04:18:34.896Z + 2026-03-17T02:44:04.156Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html - 2026-03-16T04:18:34.733Z + 2026-03-17T02:44:03.986Z https://docs.axolotl.ai/docs/api/cli.evaluate.html - 2026-03-16T04:18:34.273Z + 2026-03-17T02:44:03.516Z https://docs.axolotl.ai/docs/api/core.datasets.chat.html - 2026-03-16T04:18:34.202Z + 2026-03-17T02:44:03.443Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html - 2026-03-16T04:18:34.717Z + 2026-03-17T02:44:03.969Z https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html - 2026-03-16T04:18:34.794Z + 2026-03-17T02:44:04.051Z https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html - 2026-03-16T04:18:35.132Z + 2026-03-17T02:44:04.396Z https://docs.axolotl.ai/docs/api/kernels.lora.html - 2026-03-16T04:18:35.008Z + 2026-03-17T02:44:04.270Z https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html - 2026-03-16T04:18:35.797Z + 2026-03-17T02:44:05.067Z https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html - 2026-03-16T04:18:35.804Z + 2026-03-17T02:44:05.073Z https://docs.axolotl.ai/docs/api/utils.freeze.html - 2026-03-16T04:18:35.223Z + 2026-03-17T02:44:04.489Z https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html - 2026-03-16T04:18:35.661Z + 2026-03-17T02:44:04.929Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html - 2026-03-16T04:18:35.185Z + 2026-03-17T02:44:04.450Z https://docs.axolotl.ai/docs/api/utils.data.streaming.html - 2026-03-16T04:18:35.324Z + 2026-03-17T02:44:04.590Z https://docs.axolotl.ai/docs/api/prompt_tokenizers.html - 2026-03-16T04:18:34.113Z + 2026-03-17T02:44:03.353Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html - 2026-03-16T04:18:34.618Z + 2026-03-17T02:44:03.869Z https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html - 2026-03-16T04:18:34.417Z + 2026-03-17T02:44:03.664Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html - 2026-03-16T04:18:34.627Z + 2026-03-17T02:44:03.878Z https://docs.axolotl.ai/docs/api/convert.html - 2026-03-16T04:18:34.061Z + 2026-03-17T02:44:03.300Z https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html - 2026-03-16T04:18:35.698Z + 2026-03-17T02:44:04.967Z https://docs.axolotl.ai/docs/api/cli.args.html - 2026-03-16T04:18:34.298Z + 2026-03-17T02:44:03.541Z https://docs.axolotl.ai/docs/api/core.chat.format.shared.html - 2026-03-16T04:18:34.195Z + 2026-03-17T02:44:03.436Z https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html - 2026-03-16T04:18:34.901Z + 2026-03-17T02:44:04.160Z https://docs.axolotl.ai/docs/api/index.html - 2026-03-16T04:18:33.945Z + 2026-03-17T02:44:03.182Z https://docs.axolotl.ai/docs/fsdp_qlora.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/dataset-formats/template_free.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/dataset-formats/index.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/telemetry.html - 2026-03-16T04:14:26.688Z + 2026-03-17T02:40:20.784Z https://docs.axolotl.ai/docs/config-reference.html - 2026-03-16T04:18:57.037Z + 2026-03-17T02:44:27.450Z https://docs.axolotl.ai/docs/ray-integration.html - 2026-03-16T04:14:26.687Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/streaming.html - 2026-03-16T04:14:26.688Z + 2026-03-17T02:40:20.784Z https://docs.axolotl.ai/docs/sequence_parallelism.html - 2026-03-16T04:14:26.688Z + 2026-03-17T02:40:20.784Z https://docs.axolotl.ai/docs/unsloth.html - 2026-03-16T04:14:26.688Z + 2026-03-17T02:40:20.784Z https://docs.axolotl.ai/docs/mixed_precision.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.781Z https://docs.axolotl.ai/docs/amd_hpc.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/lr_groups.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.781Z https://docs.axolotl.ai/docs/optimizations.html - 2026-03-16T04:14:26.687Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/mac.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.781Z https://docs.axolotl.ai/index.html - 2026-03-16T04:14:26.711Z + 2026-03-17T02:40:20.803Z https://docs.axolotl.ai/docs/optimizers.html - 2026-03-16T04:14:26.687Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/getting-started.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/multi-node.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.781Z https://docs.axolotl.ai/docs/input_output.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.781Z https://docs.axolotl.ai/docs/nd_parallelism.html - 2026-03-16T04:14:26.687Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/dataset_loading.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/quantize.html - 2026-03-16T04:14:26.687Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/rlhf.html - 2026-03-16T04:14:26.687Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/custom_integrations.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/qat.html - 2026-03-16T04:14:26.687Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/checkpoint_saving.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/dataset-formats/conversation.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/dataset-formats/tokenized.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/dataset-formats/pretraining.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/api/cli.main.html - 2026-03-16T04:18:34.253Z + 2026-03-17T02:44:03.495Z https://docs.axolotl.ai/docs/api/utils.schemas.trl.html - 2026-03-16T04:18:35.429Z + 2026-03-17T02:44:04.695Z https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html - 2026-03-16T04:18:34.211Z + 2026-03-17T02:44:03.453Z https://docs.axolotl.ai/docs/api/common.const.html - 2026-03-16T04:18:35.678Z + 2026-03-17T02:44:04.947Z https://docs.axolotl.ai/docs/api/cli.utils.load.html - 2026-03-16T04:18:34.447Z + 2026-03-17T02:44:03.694Z https://docs.axolotl.ai/docs/api/loaders.patch_manager.html - 2026-03-16T04:18:34.605Z + 2026-03-17T02:44:03.856Z https://docs.axolotl.ai/docs/api/utils.quantization.html - 2026-03-16T04:18:35.356Z + 2026-03-17T02:44:04.623Z https://docs.axolotl.ai/docs/api/monkeypatch.utils.html - 2026-03-16T04:18:35.109Z + 2026-03-17T02:44:04.373Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html - 2026-03-16T04:18:34.846Z + 2026-03-17T02:44:04.104Z https://docs.axolotl.ai/docs/api/cli.quantize.html - 2026-03-16T04:18:34.397Z + 2026-03-17T02:44:03.643Z https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html - 2026-03-16T04:18:34.744Z + 2026-03-17T02:44:03.997Z https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html - 2026-03-16T04:18:35.670Z + 2026-03-17T02:44:04.938Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html - 2026-03-16T04:18:35.152Z + 2026-03-17T02:44:04.416Z https://docs.axolotl.ai/docs/api/utils.schedulers.html - 2026-03-16T04:18:35.280Z + 2026-03-17T02:44:04.546Z https://docs.axolotl.ai/docs/api/kernels.geglu.html - 2026-03-16T04:18:35.021Z + 2026-03-17T02:44:04.283Z https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html - 2026-03-16T04:18:35.124Z + 2026-03-17T02:44:04.388Z https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html - 2026-03-16T04:18:34.802Z + 2026-03-17T02:44:04.059Z https://docs.axolotl.ai/docs/api/common.architectures.html - 2026-03-16T04:18:35.676Z + 2026-03-17T02:44:04.945Z https://docs.axolotl.ai/docs/api/cli.utils.html - 2026-03-16T04:18:34.419Z + 2026-03-17T02:44:03.666Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html - 2026-03-16T04:18:35.064Z + 2026-03-17T02:44:04.326Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html - 2026-03-16T04:18:35.052Z + 2026-03-17T02:44:04.314Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html - 2026-03-16T04:18:34.870Z + 2026-03-17T02:44:04.128Z https://docs.axolotl.ai/docs/api/cli.checks.html - 2026-03-16T04:18:34.310Z + 2026-03-17T02:44:03.554Z https://docs.axolotl.ai/docs/api/core.trainers.trl.html - 2026-03-16T04:18:34.506Z + 2026-03-17T02:44:03.754Z https://docs.axolotl.ai/docs/api/utils.tokenization.html - 2026-03-16T04:18:35.193Z + 2026-03-17T02:44:04.459Z https://docs.axolotl.ai/docs/api/cli.utils.fetch.html - 2026-03-16T04:18:34.440Z + 2026-03-17T02:44:03.687Z https://docs.axolotl.ai/docs/api/core.builders.base.html - 2026-03-16T04:18:34.133Z + 2026-03-17T02:44:03.373Z https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html - 2026-03-16T04:18:35.055Z + 2026-03-17T02:44:04.318Z https://docs.axolotl.ai/docs/api/utils.trainer.html - 2026-03-16T04:18:35.246Z + 2026-03-17T02:44:04.511Z https://docs.axolotl.ai/docs/api/cli.train.html - 2026-03-16T04:18:34.263Z + 2026-03-17T02:44:03.506Z https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html - 2026-03-16T04:18:34.193Z + 2026-03-17T02:44:03.434Z https://docs.axolotl.ai/docs/api/utils.lora.html - 2026-03-16T04:18:35.201Z + 2026-03-17T02:44:04.467Z https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html - 2026-03-16T04:18:35.436Z + 2026-03-17T02:44:04.702Z https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html - 2026-03-16T04:18:35.148Z + 2026-03-17T02:44:04.412Z https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html - 2026-03-16T04:18:34.521Z + 2026-03-17T02:44:03.770Z https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html - 2026-03-16T04:18:35.785Z + 2026-03-17T02:44:05.053Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html - 2026-03-16T04:18:34.535Z + 2026-03-17T02:44:03.784Z https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html - 2026-03-16T04:18:34.656Z + 2026-03-17T02:44:03.907Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html - 2026-03-16T04:18:34.868Z + 2026-03-17T02:44:04.127Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html - 2026-03-16T04:18:34.858Z + 2026-03-17T02:44:04.116Z https://docs.axolotl.ai/docs/api/datasets.html - 2026-03-16T04:18:34.044Z + 2026-03-17T02:44:03.283Z https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html - 2026-03-16T04:18:35.652Z + 2026-03-17T02:44:04.919Z https://docs.axolotl.ai/docs/api/cli.art.html - 2026-03-16T04:18:34.302Z + 2026-03-17T02:44:03.546Z https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html - 2026-03-16T04:18:35.817Z + 2026-03-17T02:44:05.087Z https://docs.axolotl.ai/docs/api/monkeypatch.relora.html - 2026-03-16T04:18:35.062Z + 2026-03-17T02:44:04.325Z https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html - 2026-03-16T04:18:34.807Z + 2026-03-17T02:44:04.065Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html - 2026-03-16T04:18:34.847Z + 2026-03-17T02:44:04.106Z https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html - 2026-03-16T04:18:34.192Z + 2026-03-17T02:44:03.433Z https://docs.axolotl.ai/docs/api/cli.utils.args.html - 2026-03-16T04:18:34.434Z + 2026-03-17T02:44:03.681Z https://docs.axolotl.ai/docs/api/core.trainers.base.html - 2026-03-16T04:18:34.487Z + 2026-03-17T02:44:03.735Z https://docs.axolotl.ai/docs/api/utils.schemas.training.html - 2026-03-16T04:18:35.391Z + 2026-03-17T02:44:04.657Z https://docs.axolotl.ai/docs/api/evaluate.html - 2026-03-16T04:18:34.036Z + 2026-03-17T02:44:03.275Z https://docs.axolotl.ai/docs/api/cli.config.html - 2026-03-16T04:18:34.332Z + 2026-03-17T02:44:03.576Z https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html - 2026-03-16T04:18:35.674Z + 2026-03-17T02:44:04.943Z https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html - 2026-03-16T04:18:34.781Z + 2026-03-17T02:44:04.037Z https://docs.axolotl.ai/docs/api/utils.collators.batching.html - 2026-03-16T04:18:35.723Z + 2026-03-17T02:44:04.992Z https://docs.axolotl.ai/docs/api/cli.cloud.base.html - 2026-03-16T04:18:34.409Z + 2026-03-17T02:44:03.656Z https://docs.axolotl.ai/docs/api/utils.schemas.config.html - 2026-03-16T04:18:35.373Z + 2026-03-17T02:44:04.640Z https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html - 2026-03-16T04:18:35.057Z + 2026-03-17T02:44:04.320Z https://docs.axolotl.ai/docs/api/utils.dict.html - 2026-03-16T04:18:35.313Z + 2026-03-17T02:44:04.579Z https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html - 2026-03-16T04:18:35.651Z + 2026-03-17T02:44:04.919Z https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html - 2026-03-16T04:18:34.338Z + 2026-03-17T02:44:03.582Z https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html - 2026-03-16T04:18:34.380Z + 2026-03-17T02:44:03.626Z https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html - 2026-03-16T04:18:35.209Z + 2026-03-17T02:44:04.474Z https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html - 2026-03-16T04:18:35.146Z + 2026-03-17T02:44:04.410Z https://docs.axolotl.ai/docs/api/utils.schemas.utils.html - 2026-03-16T04:18:35.474Z + 2026-03-17T02:44:04.741Z https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html - 2026-03-16T04:18:35.807Z + 2026-03-17T02:44:05.078Z https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html - 2026-03-16T04:18:35.322Z + 2026-03-17T02:44:04.588Z https://docs.axolotl.ai/docs/api/kernels.swiglu.html - 2026-03-16T04:18:35.033Z + 2026-03-17T02:44:04.295Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html - 2026-03-16T04:18:34.614Z + 2026-03-17T02:44:03.865Z https://docs.axolotl.ai/docs/api/cli.inference.html - 2026-03-16T04:18:34.355Z + 2026-03-17T02:44:03.600Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html - 2026-03-16T04:18:34.842Z + 2026-03-17T02:44:04.099Z https://docs.axolotl.ai/docs/api/utils.chat_templates.html - 2026-03-16T04:18:35.194Z + 2026-03-17T02:44:04.461Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html - 2026-03-16T04:18:34.719Z + 2026-03-17T02:44:03.971Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html - 2026-03-16T04:18:34.844Z + 2026-03-17T02:44:04.101Z https://docs.axolotl.ai/docs/multipack.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/torchao.html - 2026-03-16T04:14:26.688Z + 2026-03-17T02:40:20.784Z https://docs.axolotl.ai/docs/reward_modelling.html - 2026-03-16T04:14:26.687Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/nccl.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/multi-gpu.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.781Z https://docs.axolotl.ai/docs/batch_vs_grad.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/multimodal.html - 2026-03-16T04:14:26.686Z + 2026-03-17T02:40:20.782Z https://docs.axolotl.ai/docs/models/LiquidAI.html - 2026-03-16T04:18:58.124Z + 2026-03-17T02:44:28.693Z https://docs.axolotl.ai/docs/models/mistral.html - 2026-03-16T04:18:58.119Z + 2026-03-17T02:44:28.689Z https://docs.axolotl.ai/docs/models/trinity.html - 2026-03-16T04:18:58.112Z + 2026-03-17T02:44:28.683Z https://docs.axolotl.ai/docs/models/hunyuan.html - 2026-03-16T04:18:58.125Z + 2026-03-17T02:44:28.693Z https://docs.axolotl.ai/docs/models/phi.html - 2026-03-16T04:18:58.123Z + 2026-03-17T02:44:28.692Z https://docs.axolotl.ai/docs/models/apertus.html - 2026-03-16T04:18:58.122Z + 2026-03-17T02:44:28.691Z https://docs.axolotl.ai/docs/models/plano.html - 2026-03-16T04:18:58.110Z + 2026-03-17T02:44:28.682Z https://docs.axolotl.ai/docs/models/gemma3n.html - 2026-03-16T04:18:58.121Z + 2026-03-17T02:44:28.690Z https://docs.axolotl.ai/docs/models/arcee.html - 2026-03-16T04:18:58.113Z + 2026-03-17T02:44:28.683Z https://docs.axolotl.ai/docs/models/ministral3.html - 2026-03-16T04:18:58.114Z + 2026-03-17T02:44:28.684Z https://docs.axolotl.ai/docs/models/magistral/think.html - 2026-03-16T04:18:58.116Z + 2026-03-17T02:44:28.686Z https://docs.axolotl.ai/docs/models/llama-4.html - 2026-03-16T04:18:58.119Z + 2026-03-17T02:44:28.689Z https://docs.axolotl.ai/docs/models/voxtral.html - 2026-03-16T04:18:58.118Z + 2026-03-17T02:44:28.687Z https://docs.axolotl.ai/docs/models/magistral.html - 2026-03-16T04:18:58.116Z + 2026-03-17T02:44:28.686Z https://docs.axolotl.ai/docs/models/qwen3.html - 2026-03-16T04:18:58.121Z + 2026-03-17T02:44:28.690Z https://docs.axolotl.ai/docs/models/ministral.html - 2026-03-16T04:18:58.117Z + 2026-03-17T02:44:28.687Z https://docs.axolotl.ai/docs/models/ministral3/vision.html - 2026-03-16T04:18:58.115Z + 2026-03-17T02:44:28.685Z https://docs.axolotl.ai/docs/debugging.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/docs/faq.html - 2026-03-16T04:14:26.683Z + 2026-03-17T02:40:20.779Z https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html - 2026-03-16T04:14:26.717Z + 2026-03-17T02:40:20.809Z https://docs.axolotl.ai/FAQS.html - 2026-03-16T04:14:26.680Z + 2026-03-17T02:40:20.777Z