From 60f227a9f0677a4bc6ea21fadfd0396034d42f94 Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Thu, 25 Dec 2025 11:15:25 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- docs/custom_integrations.html | 3 +- docs/multimodal.html | 69 +-- .../colab-axolotl-example.html | 2 +- index.html | 2 +- search.json | 10 +- sitemap.xml | 400 +++++++++--------- 7 files changed, 254 insertions(+), 234 deletions(-) diff --git a/.nojekyll b/.nojekyll index 0892b5282..f4c9b0725 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -7ef13619 \ No newline at end of file +3bacccbc \ No newline at end of file diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html index c4353a806..cc8dcc681 100644 --- a/docs/custom_integrations.html +++ b/docs/custom_integrations.html @@ -619,7 +619,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); -
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@242b245"
+
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@318b7e2"

Usage

@@ -652,6 +652,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
  • granitemoehybrid
  • hunyuan_v1_dense
  • hunyuan_v1_moe
  • +
  • internvl
  • kimi_linear
  • lfm2
  • lfm2_moe
  • diff --git a/docs/multimodal.html b/docs/multimodal.html index ba2edde87..fda392b59 100644 --- a/docs/multimodal.html +++ b/docs/multimodal.html @@ -528,6 +528,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
  • Qwen3-VL
  • SmolVLM2
  • LFM2-VL
  • +
  • Intern-VL
  • Dataset Format
  • @@ -815,6 +817,23 @@ Warning
    base_model: LiquidAI/LFM2-VL-450M
    +
    +

    Intern-VL

    +
    +
    +
    + +
    +
    +Tip +
    +
    +
    +

    Please make sure to install timm via pip3 install timm==1.0.19

    +
    +
    +
    base_model: OpenGVLab/InternVL3_5-8B
    +

    Dataset Format

    @@ -898,31 +917,31 @@ Warning

    Example

    Here is an example of a multi-modal dataset:

    -
    [
    -  {
    -    "messages": [
    -        {
    -            "role": "system",
    -            "content": [
    -              {"type": "text", "text": "You are a helpful assistant."}
    -              ]
    -        },
    -        {
    -            "role": "user",
    -            "content": [
    -                {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
    -                {"type": "text", "text": "Describe this image in detail."}
    -            ]
    -        },
    -        {
    -            "role": "assistant",
    -            "content": [
    -              {"type": "text", "text": "The image is a bee."}
    -            ]
    -        }
    -    ]
    -  }
    -]
    +
    [
    +  {
    +    "messages": [
    +        {
    +            "role": "system",
    +            "content": [
    +              {"type": "text", "text": "You are a helpful assistant."}
    +              ]
    +        },
    +        {
    +            "role": "user",
    +            "content": [
    +                {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg"},
    +                {"type": "text", "text": "Describe this image in detail."}
    +            ]
    +        },
    +        {
    +            "role": "assistant",
    +            "content": [
    +              {"type": "text", "text": "The image is a bee."}
    +            ]
    +        }
    +    ]
    +  }
    +]
    diff --git a/examples/colab-notebooks/colab-axolotl-example.html b/examples/colab-notebooks/colab-axolotl-example.html index 6a71e655b..854bd85cf 100644 --- a/examples/colab-notebooks/colab-axolotl-example.html +++ b/examples/colab-notebooks/colab-axolotl-example.html @@ -567,7 +567,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
    %%capture
     # This step can take ~5-10 minutes to install dependencies
     !pip install --no-build-isolation axolotl[flash-attn]>=0.9.1
    -!pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@242b245"
    +!pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@318b7e2"

    Demo: Talk Like a Pirate

    diff --git a/index.html b/index.html index 7ac0f932f..36e1471c1 100644 --- a/index.html +++ b/index.html @@ -564,7 +564,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});

    🎉 Latest Updates

      -
    • 2025/12: Axolotl now includes support for Kimi-Linear, Olmo3, Trinity, and Ministral3.
    • +
    • 2025/12: Axolotl now includes support for Kimi-Linear, Plano-Orchestrator, MiMo, InternVL 3.5, Olmo3, Trinity, and Ministral3.
    • 2025/10: New model support has been added in Axolotl for: Qwen3 Next, Qwen2.5-vl, Qwen3-vl, Qwen3, Qwen3MoE, Granite 4, HunYuan, Magistral 2509, Apertus, and Seed-OSS.
    • 2025/09: Axolotl now has text diffusion training. Read more here.
    • 2025/08: QAT has been updated to include NVFP4 support. See PR.
    • diff --git a/search.json b/search.json index c97c21770..9c7feab13 100644 --- a/search.json +++ b/search.json @@ -1910,7 +1910,7 @@ "href": "docs/custom_integrations.html#cut-cross-entropy", "title": "Custom Integrations", "section": "Cut Cross Entropy", - "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@242b245\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\napertus\narcee\ncohere\ncohere2\ndeepseek_v3\ngemma\ngemma2\ngemma3\ngemma3_text\ngemma3n\ngemma3n_text\nglm\nglm4\nglm4_moe\nglm4v\nglm4v_moe\ngpt_oss\ngranite\ngranitemoe\ngranitemoeshared\ngranitemoehybrid\nhunyuan_v1_dense\nhunyuan_v1_moe\nkimi_linear\nlfm2\nlfm2_moe\nlfm2_vl\nllama\nllama4\nllama4_text\nllava\nministral\nministral3\nmistral\nmistral3\nmixtral\nmllama\nolmo\nolmo2\nolmo3\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_vl\nqwen2_moe\nqwen2_5_vl\nqwen3\nqwen3_moe\nqwen3_vl\nqwen3_vl_moe\nqwen3_next\nsmollm3\nseed_oss\nvoxtral\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", + "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@318b7e2\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\napertus\narcee\ncohere\ncohere2\ndeepseek_v3\ngemma\ngemma2\ngemma3\ngemma3_text\ngemma3n\ngemma3n_text\nglm\nglm4\nglm4_moe\nglm4v\nglm4v_moe\ngpt_oss\ngranite\ngranitemoe\ngranitemoeshared\ngranitemoehybrid\nhunyuan_v1_dense\nhunyuan_v1_moe\ninternvl\nkimi_linear\nlfm2\nlfm2_moe\nlfm2_vl\nllama\nllama4\nllama4_text\nllava\nministral\nministral3\nmistral\nmistral3\nmixtral\nmllama\nolmo\nolmo2\nolmo3\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_vl\nqwen2_moe\nqwen2_5_vl\nqwen3\nqwen3_moe\nqwen3_vl\nqwen3_vl_moe\nqwen3_next\nsmollm3\nseed_oss\nvoxtral\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", "crumbs": [ "Advanced Features", "Custom Integrations" @@ -2030,7 +2030,7 @@ "href": "index.html#latest-updates", "title": "Axolotl", "section": "🎉 Latest Updates", - "text": "🎉 Latest Updates\n\n2025/12: Axolotl now includes support for Kimi-Linear, Olmo3, Trinity, and Ministral3.\n2025/10: New model support has been added in Axolotl for: Qwen3 Next, Qwen2.5-vl, Qwen3-vl, Qwen3, Qwen3MoE, Granite 4, HunYuan, Magistral 2509, Apertus, and Seed-OSS.\n2025/09: Axolotl now has text diffusion training. Read more here.\n2025/08: QAT has been updated to include NVFP4 support. See PR.\n2025/07:\n\nND Parallelism support has been added into Axolotl. Compose Context Parallelism (CP), Tensor Parallelism (TP), and Fully Sharded Data Parallelism (FSDP) within a single node and across multiple nodes. Check out the blog post for more info.\nAxolotl adds more models: GPT-OSS, Gemma 3n, Liquid Foundation Model 2 (LFM2), and Arcee Foundation Models (AFM).\nFP8 finetuning with fp8 gather op is now possible in Axolotl via torchao. Get started here!\nVoxtral, Magistral 1.1, and Devstral with mistral-common tokenizer support has been integrated in Axolotl!\nTiledMLP support for single-GPU to multi-GPU training with DDP, DeepSpeed and FSDP support has been added to support Arctic Long Sequence Training. (ALST). See examples for using ALST with Axolotl!\n\n2025/05: Quantization Aware Training (QAT) support has been added to Axolotl. Explore the docs to learn more!\n\n\n\nExpand older updates\n\n\n2025/03: Axolotl has implemented Sequence Parallelism (SP) support. Read the blog and docs to learn how to scale your context length when fine-tuning.\n2025/06: Magistral with mistral-common tokenizer support has been added to Axolotl. See examples to start training your own Magistral models with Axolotl!\n2025/04: Llama 4 support has been added in Axolotl. See examples to start training your own Llama 4 models with Axolotl’s linearized version!\n2025/03: (Beta) Fine-tuning Multimodal models is now supported in Axolotl. Check out the docs to fine-tune your own!\n2025/02: Axolotl has added LoRA optimizations to reduce memory usage and improve training speed for LoRA and QLoRA in single GPU and multi-GPU training (DDP and DeepSpeed). Jump into the docs to give it a try.\n2025/02: Axolotl has added GRPO support. Dive into our blog and GRPO example and have some fun!\n2025/01: Axolotl has added Reward Modelling / Process Reward Modelling fine-tuning support. See docs.", + "text": "🎉 Latest Updates\n\n2025/12: Axolotl now includes support for Kimi-Linear, Plano-Orchestrator, MiMo, InternVL 3.5, Olmo3, Trinity, and Ministral3.\n2025/10: New model support has been added in Axolotl for: Qwen3 Next, Qwen2.5-vl, Qwen3-vl, Qwen3, Qwen3MoE, Granite 4, HunYuan, Magistral 2509, Apertus, and Seed-OSS.\n2025/09: Axolotl now has text diffusion training. Read more here.\n2025/08: QAT has been updated to include NVFP4 support. See PR.\n2025/07:\n\nND Parallelism support has been added into Axolotl. Compose Context Parallelism (CP), Tensor Parallelism (TP), and Fully Sharded Data Parallelism (FSDP) within a single node and across multiple nodes. Check out the blog post for more info.\nAxolotl adds more models: GPT-OSS, Gemma 3n, Liquid Foundation Model 2 (LFM2), and Arcee Foundation Models (AFM).\nFP8 finetuning with fp8 gather op is now possible in Axolotl via torchao. Get started here!\nVoxtral, Magistral 1.1, and Devstral with mistral-common tokenizer support has been integrated in Axolotl!\nTiledMLP support for single-GPU to multi-GPU training with DDP, DeepSpeed and FSDP support has been added to support Arctic Long Sequence Training. (ALST). See examples for using ALST with Axolotl!\n\n2025/05: Quantization Aware Training (QAT) support has been added to Axolotl. Explore the docs to learn more!\n\n\n\nExpand older updates\n\n\n2025/03: Axolotl has implemented Sequence Parallelism (SP) support. Read the blog and docs to learn how to scale your context length when fine-tuning.\n2025/06: Magistral with mistral-common tokenizer support has been added to Axolotl. See examples to start training your own Magistral models with Axolotl!\n2025/04: Llama 4 support has been added in Axolotl. See examples to start training your own Llama 4 models with Axolotl’s linearized version!\n2025/03: (Beta) Fine-tuning Multimodal models is now supported in Axolotl. Check out the docs to fine-tune your own!\n2025/02: Axolotl has added LoRA optimizations to reduce memory usage and improve training speed for LoRA and QLoRA in single GPU and multi-GPU training (DDP and DeepSpeed). Jump into the docs to give it a try.\n2025/02: Axolotl has added GRPO support. Dive into our blog and GRPO example and have some fun!\n2025/01: Axolotl has added Reward Modelling / Process Reward Modelling fine-tuning support. See docs.", "crumbs": [ "Home" ] @@ -3455,7 +3455,7 @@ "href": "docs/multimodal.html", "title": "MultiModal / Vision Language Models (BETA)", "section": "", - "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nMagistral-Small-2509\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nSmolVLM2\nLFM2-VL", + "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nMagistral-Small-2509\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nSmolVLM2\nLFM2-VL\nIntern-VL", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" @@ -3466,7 +3466,7 @@ "href": "docs/multimodal.html#supported-models", "title": "MultiModal / Vision Language Models (BETA)", "section": "", - "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nMagistral-Small-2509\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nSmolVLM2\nLFM2-VL", + "text": "Mllama\nLlama4\nPixtral\nLlava-1.5\nMistral-Small-3.1\nMagistral-Small-2509\nVoxtral\nGemma-3\nGemma-3n\nQwen2-VL\nQwen2.5-VL\nSmolVLM2\nLFM2-VL\nIntern-VL", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" @@ -3477,7 +3477,7 @@ "href": "docs/multimodal.html#usage", "title": "MultiModal / Vision Language Models (BETA)", "section": "Usage", - "text": "Usage\nMultimodal support is limited and doesn’t have full feature parity.\nHere are the hyperparams you’ll need to use to finetune a multimodal model.\nprocessor_type: AutoProcessor\n\nskip_prepare_dataset: true\nremove_unused_columns: false # leave columns in place as they are needed to handle image embeddings during training\nsample_packing: false # not yet supported with multimodal\n\nchat_template: # see in next section if specified\n\n# example dataset\ndatasets:\n - path: HuggingFaceH4/llava-instruct-mix-vsft\n type: chat_template\n split: train[:1%]\n\n# (optional) if doing lora, only finetune the Language model,\n# leave the vision model and vision tower frozen\n# load_in_8bit: true\nadapter: lora\nlora_target_modules: 'model.language_model.layers.[\\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'\n\n# (optional) if you want to resize images to a set size\nimage_size: 512\nimage_resize_algorithm: bilinear\nPlease see examples folder for full configs.\n\n\n\n\n\n\nTip\n\n\n\nSome of our chat_templates have been extended to support broader dataset types. This should not break any existing configs.\n\n\n\n\n\n\n\n\nNote\n\n\n\nAs of now, we do not truncate nor drop samples based on sequence_len as each arch has different ways to process non-text tokens. We are looking for help on this.\n\n\n\nMllama\nbase_model: meta-llama/Llama-3.2-11B-Vision-Instruct\n\nchat_template: llama3_2_vision\n\n\nLlama4\nbase_model: meta-llama/Llama-4-Scout-17B-16E-Instruct\n\nchat_template: llama4\n\n\nPixtral\nbase_model: mistralai/Pixtral-12B-2409\n\nchat_template: pixtral\n\n\nLlava-1.5\nbase_model: llava-hf/llava-1.5-7b-hf\n\nchat_template: llava\n\n\nMistral-Small-3.1\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install vision lib via pip install 'mistral-common[opencv]==1.8.5'\n\n\nbase_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503\n\n\nMagistral-Small-2509\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install vision lib via pip install 'mistral-common[opencv]==1.8.5'\n\n\nbase_model: mistralai/Magistral-Small-2509\n\n\nVoxtral\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install audio lib via pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'\n\n\nbase_model: mistralai/Voxtral-Mini-3B-2507\n\nprocessor_type: VoxtralProcessor\n\n\nGemma-3\n\n\n\n\n\n\nTip\n\n\n\nThe Gemma3-1B model is a text-only model, so please train as regular text model.\n\n\nFor multi-modal 4B/12B/27B models, use the following config:\nbase_model: google/gemma-3-4b-it\n\nchat_template: gemma3\n\n\nGemma-3n\n\n\n\n\n\n\nWarning\n\n\n\nThe model’s initial loss and grad norm will be very high. We suspect this to be due to the Conv in the vision layers.\n\n\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install timm via pip3 install timm==1.0.17\n\n\nbase_model: google/gemma-3n-E2B-it\n\nchat_template: gemma3n\n\n\nQwen2-VL\nbase_model: Qwen/Qwen2-VL-7B-Instruct\n\nchat_template: qwen2_vl\n\n\nQwen2.5-VL\nbase_model: Qwen/Qwen2.5-VL-7B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl\n\n\nQwen3-VL\nbase_model: Qwen/Qwen3-VL-4B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl\n\n\nSmolVLM2\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install num2words via pip3 install num2words==0.5.14\n\n\nbase_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct\n\n\nLFM2-VL\n\n\n\n\n\n\nWarning\n\n\n\nPlease uninstall causal-conv1d via pip3 uninstall -y causal-conv1d\n\n\nbase_model: LiquidAI/LFM2-VL-450M", + "text": "Usage\nMultimodal support is limited and doesn’t have full feature parity.\nHere are the hyperparams you’ll need to use to finetune a multimodal model.\nprocessor_type: AutoProcessor\n\nskip_prepare_dataset: true\nremove_unused_columns: false # leave columns in place as they are needed to handle image embeddings during training\nsample_packing: false # not yet supported with multimodal\n\nchat_template: # see in next section if specified\n\n# example dataset\ndatasets:\n - path: HuggingFaceH4/llava-instruct-mix-vsft\n type: chat_template\n split: train[:1%]\n\n# (optional) if doing lora, only finetune the Language model,\n# leave the vision model and vision tower frozen\n# load_in_8bit: true\nadapter: lora\nlora_target_modules: 'model.language_model.layers.[\\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'\n\n# (optional) if you want to resize images to a set size\nimage_size: 512\nimage_resize_algorithm: bilinear\nPlease see examples folder for full configs.\n\n\n\n\n\n\nTip\n\n\n\nSome of our chat_templates have been extended to support broader dataset types. This should not break any existing configs.\n\n\n\n\n\n\n\n\nNote\n\n\n\nAs of now, we do not truncate nor drop samples based on sequence_len as each arch has different ways to process non-text tokens. We are looking for help on this.\n\n\n\nMllama\nbase_model: meta-llama/Llama-3.2-11B-Vision-Instruct\n\nchat_template: llama3_2_vision\n\n\nLlama4\nbase_model: meta-llama/Llama-4-Scout-17B-16E-Instruct\n\nchat_template: llama4\n\n\nPixtral\nbase_model: mistralai/Pixtral-12B-2409\n\nchat_template: pixtral\n\n\nLlava-1.5\nbase_model: llava-hf/llava-1.5-7b-hf\n\nchat_template: llava\n\n\nMistral-Small-3.1\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install vision lib via pip install 'mistral-common[opencv]==1.8.5'\n\n\nbase_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503\n\n\nMagistral-Small-2509\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install vision lib via pip install 'mistral-common[opencv]==1.8.5'\n\n\nbase_model: mistralai/Magistral-Small-2509\n\n\nVoxtral\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install audio lib via pip3 install librosa==0.11.0 'mistral_common[audio]==1.8.3'\n\n\nbase_model: mistralai/Voxtral-Mini-3B-2507\n\nprocessor_type: VoxtralProcessor\n\n\nGemma-3\n\n\n\n\n\n\nTip\n\n\n\nThe Gemma3-1B model is a text-only model, so please train as regular text model.\n\n\nFor multi-modal 4B/12B/27B models, use the following config:\nbase_model: google/gemma-3-4b-it\n\nchat_template: gemma3\n\n\nGemma-3n\n\n\n\n\n\n\nWarning\n\n\n\nThe model’s initial loss and grad norm will be very high. We suspect this to be due to the Conv in the vision layers.\n\n\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install timm via pip3 install timm==1.0.17\n\n\nbase_model: google/gemma-3n-E2B-it\n\nchat_template: gemma3n\n\n\nQwen2-VL\nbase_model: Qwen/Qwen2-VL-7B-Instruct\n\nchat_template: qwen2_vl\n\n\nQwen2.5-VL\nbase_model: Qwen/Qwen2.5-VL-7B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl\n\n\nQwen3-VL\nbase_model: Qwen/Qwen3-VL-4B-Instruct\n\nchat_template: qwen2_vl # same as qwen2-vl\n\n\nSmolVLM2\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install num2words via pip3 install num2words==0.5.14\n\n\nbase_model: HuggingFaceTB/SmolVLM2-500M-Video-Instruct\n\n\nLFM2-VL\n\n\n\n\n\n\nWarning\n\n\n\nPlease uninstall causal-conv1d via pip3 uninstall -y causal-conv1d\n\n\nbase_model: LiquidAI/LFM2-VL-450M\n\n\nIntern-VL\n\n\n\n\n\n\nTip\n\n\n\nPlease make sure to install timm via pip3 install timm==1.0.19\n\n\nbase_model: OpenGVLab/InternVL3_5-8B", "crumbs": [ "How To Guides", "MultiModal / Vision Language Models (BETA)" diff --git a/sitemap.xml b/sitemap.xml index 728cd3b02..17a64eba2 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,802 +2,802 @@ https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html - 2025-12-25T10:57:19.781Z + 2025-12-25T11:09:57.233Z https://docs.axolotl.ai/docs/mac.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.213Z https://docs.axolotl.ai/docs/cli.html - 2025-12-25T10:57:19.742Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/mixed_precision.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.213Z https://docs.axolotl.ai/docs/installation.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.213Z https://docs.axolotl.ai/docs/dataset_loading.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/sequence_parallelism.html - 2025-12-25T10:57:19.749Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/optimizations.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/gradient_checkpointing.html - 2025-12-25T10:57:19.744Z + 2025-12-25T11:09:57.211Z https://docs.axolotl.ai/docs/docker.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/input_output.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.213Z https://docs.axolotl.ai/docs/multi-gpu.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.213Z https://docs.axolotl.ai/docs/dataset-formats/template_free.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/dataset-formats/tokenized.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/dataset-formats/pretraining.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/rlhf.html - 2025-12-25T10:57:19.749Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/multi-node.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.213Z https://docs.axolotl.ai/docs/batch_vs_grad.html - 2025-12-25T10:57:19.742Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/nd_parallelism.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/fsdp_qlora.html - 2025-12-25T10:57:19.744Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/quantize.html - 2025-12-25T10:57:19.749Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/reward_modelling.html - 2025-12-25T10:57:19.749Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html - 2025-12-25T11:00:54.114Z + 2025-12-25T11:13:17.038Z https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html - 2025-12-25T11:00:54.973Z + 2025-12-25T11:13:17.866Z https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html - 2025-12-25T11:00:55.454Z + 2025-12-25T11:13:18.330Z https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html - 2025-12-25T11:00:55.446Z + 2025-12-25T11:13:18.321Z https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html - 2025-12-25T11:00:54.113Z + 2025-12-25T11:13:17.037Z https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html - 2025-12-25T11:00:55.575Z + 2025-12-25T11:13:18.445Z https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html - 2025-12-25T11:00:55.579Z + 2025-12-25T11:13:18.450Z https://docs.axolotl.ai/docs/api/cli.preprocess.html - 2025-12-25T11:00:54.295Z + 2025-12-25T11:13:17.214Z https://docs.axolotl.ai/docs/api/cli.utils.load.html - 2025-12-25T11:00:54.348Z + 2025-12-25T11:13:17.264Z https://docs.axolotl.ai/docs/api/cli.inference.html - 2025-12-25T11:00:54.263Z + 2025-12-25T11:13:17.182Z https://docs.axolotl.ai/docs/api/cli.args.html - 2025-12-25T11:00:54.210Z + 2025-12-25T11:13:17.131Z https://docs.axolotl.ai/docs/api/prompt_tokenizers.html - 2025-12-25T11:00:54.041Z + 2025-12-25T11:13:16.968Z https://docs.axolotl.ai/docs/api/cli.vllm_serve.html - 2025-12-25T11:00:54.309Z + 2025-12-25T11:13:17.227Z https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html - 2025-12-25T11:00:54.983Z + 2025-12-25T11:13:17.877Z https://docs.axolotl.ai/docs/api/loaders.constants.html - 2025-12-25T11:00:54.491Z + 2025-12-25T11:13:17.402Z https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html - 2025-12-25T11:00:54.645Z + 2025-12-25T11:13:17.549Z https://docs.axolotl.ai/docs/api/core.trainers.trl.html - 2025-12-25T11:00:54.401Z + 2025-12-25T11:13:17.315Z https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html - 2025-12-25T11:00:55.488Z + 2025-12-25T11:13:18.362Z https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html - 2025-12-25T11:00:54.960Z + 2025-12-25T11:13:17.854Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html - 2025-12-25T11:00:54.592Z + 2025-12-25T11:13:17.498Z https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html - 2025-12-25T11:00:54.658Z + 2025-12-25T11:13:17.562Z https://docs.axolotl.ai/docs/api/utils.freeze.html - 2025-12-25T11:00:55.054Z + 2025-12-25T11:13:17.944Z https://docs.axolotl.ai/docs/api/logging_config.html - 2025-12-25T11:00:54.052Z + 2025-12-25T11:13:16.978Z https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html - 2025-12-25T11:00:54.964Z + 2025-12-25T11:13:17.858Z https://docs.axolotl.ai/docs/api/cli.art.html - 2025-12-25T11:00:54.214Z + 2025-12-25T11:13:17.134Z https://docs.axolotl.ai/docs/api/utils.collators.mamba.html - 2025-12-25T11:00:55.516Z + 2025-12-25T11:13:18.388Z https://docs.axolotl.ai/docs/api/cli.utils.train.html - 2025-12-25T11:00:54.368Z + 2025-12-25T11:13:17.283Z https://docs.axolotl.ai/docs/api/integrations.base.html - 2025-12-25T11:00:55.441Z + 2025-12-25T11:13:18.316Z https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html - 2025-12-25T11:00:54.415Z + 2025-12-25T11:13:17.329Z https://docs.axolotl.ai/docs/api/cli.checks.html - 2025-12-25T11:00:54.221Z + 2025-12-25T11:13:17.141Z https://docs.axolotl.ai/docs/api/common.architectures.html - 2025-12-25T11:00:55.468Z + 2025-12-25T11:13:18.342Z https://docs.axolotl.ai/docs/api/utils.bench.html - 2025-12-25T11:00:55.045Z + 2025-12-25T11:13:17.936Z https://docs.axolotl.ai/docs/api/utils.data.streaming.html - 2025-12-25T11:00:55.145Z + 2025-12-25T11:13:18.033Z https://docs.axolotl.ai/docs/api/utils.chat_templates.html - 2025-12-25T11:00:55.029Z + 2025-12-25T11:13:17.920Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html - 2025-12-25T11:00:54.607Z + 2025-12-25T11:13:17.513Z https://docs.axolotl.ai/docs/api/core.trainers.utils.html - 2025-12-25T11:00:54.443Z + 2025-12-25T11:13:17.356Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html - 2025-12-25T11:00:54.708Z + 2025-12-25T11:13:17.610Z https://docs.axolotl.ai/docs/api/cli.evaluate.html - 2025-12-25T11:00:54.188Z + 2025-12-25T11:13:17.109Z https://docs.axolotl.ai/docs/api/monkeypatch.relora.html - 2025-12-25T11:00:54.907Z + 2025-12-25T11:13:17.803Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html - 2025-12-25T11:00:54.707Z + 2025-12-25T11:13:17.608Z https://docs.axolotl.ai/docs/api/cli.utils.fetch.html - 2025-12-25T11:00:54.342Z + 2025-12-25T11:13:17.257Z https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html - 2025-12-25T11:00:55.227Z + 2025-12-25T11:13:18.112Z https://docs.axolotl.ai/docs/api/common.datasets.html - 2025-12-25T11:00:55.487Z + 2025-12-25T11:13:18.361Z https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html - 2025-12-25T11:00:55.585Z + 2025-12-25T11:13:18.455Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html - 2025-12-25T11:00:54.730Z + 2025-12-25T11:13:17.632Z https://docs.axolotl.ai/docs/api/utils.schemas.model.html - 2025-12-25T11:00:55.198Z + 2025-12-25T11:13:18.084Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html - 2025-12-25T11:00:54.899Z + 2025-12-25T11:13:17.796Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html - 2025-12-25T11:00:54.953Z + 2025-12-25T11:13:17.848Z https://docs.axolotl.ai/docs/api/core.trainers.base.html - 2025-12-25T11:00:54.384Z + 2025-12-25T11:13:17.299Z https://docs.axolotl.ai/docs/api/cli.utils.args.html - 2025-12-25T11:00:54.335Z + 2025-12-25T11:13:17.252Z https://docs.axolotl.ai/docs/api/utils.schemas.trl.html - 2025-12-25T11:00:55.243Z + 2025-12-25T11:13:18.125Z https://docs.axolotl.ai/docs/api/core.builders.base.html - 2025-12-25T11:00:54.059Z + 2025-12-25T11:13:16.985Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html - 2025-12-25T11:00:54.898Z + 2025-12-25T11:13:17.794Z https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html - 2025-12-25T11:00:54.320Z + 2025-12-25T11:13:17.237Z https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html - 2025-12-25T11:00:55.144Z + 2025-12-25T11:13:18.031Z https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html - 2025-12-25T11:00:55.466Z + 2025-12-25T11:13:18.341Z https://docs.axolotl.ai/docs/api/cli.main.html - 2025-12-25T11:00:54.169Z + 2025-12-25T11:13:17.090Z https://docs.axolotl.ai/docs/api/kernels.lora.html - 2025-12-25T11:00:54.858Z + 2025-12-25T11:13:17.755Z https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html - 2025-12-25T11:00:55.041Z + 2025-12-25T11:13:17.932Z https://docs.axolotl.ai/docs/api/utils.distributed.html - 2025-12-25T11:00:55.128Z + 2025-12-25T11:13:18.017Z https://docs.axolotl.ai/docs/api/loaders.model.html - 2025-12-25T11:00:54.454Z + 2025-12-25T11:13:17.366Z https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html - 2025-12-25T11:00:54.901Z + 2025-12-25T11:13:17.797Z https://docs.axolotl.ai/docs/api/evaluate.html - 2025-12-25T11:00:53.971Z + 2025-12-25T11:13:16.900Z https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html - 2025-12-25T11:00:54.247Z + 2025-12-25T11:13:17.167Z https://docs.axolotl.ai/docs/api/utils.trainer.html - 2025-12-25T11:00:55.074Z + 2025-12-25T11:13:17.964Z https://docs.axolotl.ai/docs/api/cli.quantize.html - 2025-12-25T11:00:54.301Z + 2025-12-25T11:13:17.219Z https://docs.axolotl.ai/docs/api/common.const.html - 2025-12-25T11:00:55.469Z + 2025-12-25T11:13:18.344Z https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html - 2025-12-25T11:00:54.631Z + 2025-12-25T11:13:17.536Z https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html - 2025-12-25T11:00:55.521Z + 2025-12-25T11:13:18.394Z https://docs.axolotl.ai/docs/api/core.datasets.chat.html - 2025-12-25T11:00:54.122Z + 2025-12-25T11:13:17.046Z https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html - 2025-12-25T11:00:54.760Z + 2025-12-25T11:13:17.660Z https://docs.axolotl.ai/docs/api/core.trainers.mamba.html - 2025-12-25T11:00:54.408Z + 2025-12-25T11:13:17.321Z https://docs.axolotl.ai/docs/api/core.builders.causal.html - 2025-12-25T11:00:54.065Z + 2025-12-25T11:13:16.990Z https://docs.axolotl.ai/docs/api/kernels.geglu.html - 2025-12-25T11:00:54.870Z + 2025-12-25T11:13:17.766Z https://docs.axolotl.ai/docs/api/utils.schemas.utils.html - 2025-12-25T11:00:55.284Z + 2025-12-25T11:13:18.165Z https://docs.axolotl.ai/docs/custom_integrations.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/index.html - 2025-12-25T10:57:19.774Z + 2025-12-25T11:09:57.229Z https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html - 2025-12-25T10:57:19.755Z + 2025-12-25T11:09:57.217Z https://docs.axolotl.ai/FAQS.html - 2025-12-25T10:57:19.740Z + 2025-12-25T11:09:57.209Z https://docs.axolotl.ai/docs/inference.html - 2025-12-25T10:57:19.747Z + 2025-12-25T11:09:57.213Z https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html - 2025-12-25T11:00:54.131Z + 2025-12-25T11:13:17.054Z https://docs.axolotl.ai/docs/api/train.html - 2025-12-25T11:00:53.959Z + 2025-12-25T11:13:16.889Z https://docs.axolotl.ai/docs/api/utils.schemas.training.html - 2025-12-25T11:00:55.206Z + 2025-12-25T11:13:18.092Z https://docs.axolotl.ai/docs/api/kernels.quantize.html - 2025-12-25T11:00:54.890Z + 2025-12-25T11:13:17.786Z https://docs.axolotl.ai/docs/api/utils.dict.html - 2025-12-25T11:00:55.135Z + 2025-12-25T11:13:18.023Z https://docs.axolotl.ai/docs/api/utils.quantization.html - 2025-12-25T11:00:55.175Z + 2025-12-25T11:13:18.061Z https://docs.axolotl.ai/docs/api/utils.collators.batching.html - 2025-12-25T11:00:55.512Z + 2025-12-25T11:13:18.384Z https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html - 2025-12-25T11:00:54.903Z + 2025-12-25T11:13:17.799Z https://docs.axolotl.ai/docs/api/cli.config.html - 2025-12-25T11:00:54.241Z + 2025-12-25T11:13:17.161Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html - 2025-12-25T11:00:54.498Z + 2025-12-25T11:13:17.408Z https://docs.axolotl.ai/docs/api/utils.lora.html - 2025-12-25T11:00:55.035Z + 2025-12-25T11:13:17.926Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html - 2025-12-25T11:00:54.442Z + 2025-12-25T11:13:17.354Z https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html - 2025-12-25T11:00:54.662Z + 2025-12-25T11:13:17.566Z https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html - 2025-12-25T11:00:54.985Z + 2025-12-25T11:13:17.878Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html - 2025-12-25T11:00:54.695Z + 2025-12-25T11:13:17.597Z https://docs.axolotl.ai/docs/api/index.html - 2025-12-25T11:00:53.887Z + 2025-12-25T11:13:16.819Z https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html - 2025-12-25T11:00:54.536Z + 2025-12-25T11:13:17.445Z https://docs.axolotl.ai/docs/api/cli.utils.html - 2025-12-25T11:00:54.322Z + 2025-12-25T11:13:17.239Z https://docs.axolotl.ai/docs/api/cli.merge_lora.html - 2025-12-25T11:00:54.272Z + 2025-12-25T11:13:17.192Z https://docs.axolotl.ai/docs/api/kernels.utils.html - 2025-12-25T11:00:54.891Z + 2025-12-25T11:13:17.788Z https://docs.axolotl.ai/docs/api/prompt_strategies.base.html - 2025-12-25T11:00:54.538Z + 2025-12-25T11:13:17.447Z https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html - 2025-12-25T11:00:55.589Z + 2025-12-25T11:13:18.459Z https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html - 2025-12-25T11:00:55.249Z + 2025-12-25T11:13:18.131Z https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html - 2025-12-25T11:00:54.576Z + 2025-12-25T11:13:17.483Z https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html - 2025-12-25T11:00:55.597Z + 2025-12-25T11:13:18.467Z https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html - 2025-12-25T11:00:55.567Z + 2025-12-25T11:13:18.438Z https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html - 2025-12-25T11:00:54.756Z + 2025-12-25T11:13:17.656Z https://docs.axolotl.ai/docs/api/core.chat.format.shared.html - 2025-12-25T11:00:54.116Z + 2025-12-25T11:13:17.040Z https://docs.axolotl.ai/docs/api/utils.collators.core.html - 2025-12-25T11:00:55.490Z + 2025-12-25T11:13:18.363Z https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html - 2025-12-25T11:00:55.462Z + 2025-12-25T11:13:18.337Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html - 2025-12-25T11:00:54.428Z + 2025-12-25T11:13:17.341Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html - 2025-12-25T11:00:54.682Z + 2025-12-25T11:13:17.585Z https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html - 2025-12-25T11:00:54.952Z + 2025-12-25T11:13:17.846Z https://docs.axolotl.ai/docs/api/utils.schemas.enums.html - 2025-12-25T11:00:55.278Z + 2025-12-25T11:13:18.159Z https://docs.axolotl.ai/docs/api/utils.schemas.config.html - 2025-12-25T11:00:55.190Z + 2025-12-25T11:13:18.077Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html - 2025-12-25T11:00:54.712Z + 2025-12-25T11:13:17.613Z https://docs.axolotl.ai/docs/api/convert.html - 2025-12-25T11:00:53.994Z + 2025-12-25T11:13:16.922Z https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html - 2025-12-25T11:00:55.268Z + 2025-12-25T11:13:18.149Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html - 2025-12-25T11:00:54.721Z + 2025-12-25T11:13:17.622Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html - 2025-12-25T11:00:54.710Z + 2025-12-25T11:13:17.612Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html - 2025-12-25T11:00:54.509Z + 2025-12-25T11:13:17.419Z https://docs.axolotl.ai/docs/api/core.chat.messages.html - 2025-12-25T11:00:54.111Z + 2025-12-25T11:13:17.035Z https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html - 2025-12-25T11:00:54.617Z + 2025-12-25T11:13:17.522Z https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html - 2025-12-25T11:00:54.675Z + 2025-12-25T11:13:17.578Z https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html - 2025-12-25T11:00:54.941Z + 2025-12-25T11:13:17.835Z https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html - 2025-12-25T11:00:54.650Z + 2025-12-25T11:13:17.554Z https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html - 2025-12-25T11:00:54.638Z + 2025-12-25T11:13:17.543Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html - 2025-12-25T11:00:54.502Z + 2025-12-25T11:13:17.412Z https://docs.axolotl.ai/docs/api/cli.train.html - 2025-12-25T11:00:54.178Z + 2025-12-25T11:13:17.100Z https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html - 2025-12-25T11:00:54.286Z + 2025-12-25T11:13:17.205Z https://docs.axolotl.ai/docs/api/loaders.adapter.html - 2025-12-25T11:00:54.472Z + 2025-12-25T11:13:17.383Z https://docs.axolotl.ai/docs/api/core.training_args.html - 2025-12-25T11:00:54.085Z + 2025-12-25T11:13:17.009Z https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html - 2025-12-25T11:00:55.581Z + 2025-12-25T11:13:18.451Z https://docs.axolotl.ai/docs/api/loaders.processor.html - 2025-12-25T11:00:54.466Z + 2025-12-25T11:13:17.377Z https://docs.axolotl.ai/docs/api/core.builders.rl.html - 2025-12-25T11:00:54.070Z + 2025-12-25T11:13:16.995Z https://docs.axolotl.ai/docs/api/monkeypatch.utils.html - 2025-12-25T11:00:54.950Z + 2025-12-25T11:13:17.844Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html - 2025-12-25T11:00:54.732Z + 2025-12-25T11:13:17.633Z https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html - 2025-12-25T11:00:55.444Z + 2025-12-25T11:13:18.320Z https://docs.axolotl.ai/docs/api/kernels.swiglu.html - 2025-12-25T11:00:54.881Z + 2025-12-25T11:13:17.778Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html - 2025-12-25T11:00:55.019Z + 2025-12-25T11:13:17.911Z https://docs.axolotl.ai/docs/api/cli.cloud.base.html - 2025-12-25T11:00:54.313Z + 2025-12-25T11:13:17.230Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html - 2025-12-25T11:00:54.593Z + 2025-12-25T11:13:17.500Z https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html - 2025-12-25T11:00:54.670Z + 2025-12-25T11:13:17.573Z https://docs.axolotl.ai/docs/api/utils.schemas.peft.html - 2025-12-25T11:00:55.239Z + 2025-12-25T11:13:18.121Z https://docs.axolotl.ai/docs/api/integrations.liger.args.html - 2025-12-25T11:00:55.458Z + 2025-12-25T11:13:18.333Z https://docs.axolotl.ai/docs/api/loaders.patch_manager.html - 2025-12-25T11:00:54.490Z + 2025-12-25T11:13:17.400Z https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html - 2025-12-25T11:00:54.971Z + 2025-12-25T11:13:17.865Z https://docs.axolotl.ai/docs/api/utils.data.sft.html - 2025-12-25T11:00:55.152Z + 2025-12-25T11:13:18.040Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html - 2025-12-25T11:00:54.989Z + 2025-12-25T11:13:17.882Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html - 2025-12-25T11:00:54.909Z + 2025-12-25T11:13:17.804Z https://docs.axolotl.ai/docs/api/loaders.tokenizer.html - 2025-12-25T11:00:54.464Z + 2025-12-25T11:13:17.375Z https://docs.axolotl.ai/docs/api/utils.tokenization.html - 2025-12-25T11:00:55.027Z + 2025-12-25T11:13:17.918Z https://docs.axolotl.ai/docs/api/datasets.html - 2025-12-25T11:00:53.978Z + 2025-12-25T11:13:16.907Z https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html - 2025-12-25T11:00:54.355Z + 2025-12-25T11:13:17.270Z https://docs.axolotl.ai/docs/api/utils.schedulers.html - 2025-12-25T11:00:55.105Z + 2025-12-25T11:13:17.994Z https://docs.axolotl.ai/docs/optimizers.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/torchao.html - 2025-12-25T10:57:19.749Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/dataset_preprocessing.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/faq.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/ray-integration.html - 2025-12-25T10:57:19.749Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/multimodal.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/config-reference.html - 2025-12-25T11:01:10.357Z + 2025-12-25T11:13:34.425Z https://docs.axolotl.ai/docs/dataset-formats/index.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/dataset-formats/conversation.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/debugging.html - 2025-12-25T10:57:19.743Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/amd_hpc.html - 2025-12-25T10:57:19.742Z + 2025-12-25T11:09:57.210Z https://docs.axolotl.ai/docs/lora_optims.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.213Z https://docs.axolotl.ai/docs/streaming.html - 2025-12-25T10:57:19.749Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/multipack.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/qat.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/lr_groups.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.213Z https://docs.axolotl.ai/docs/getting-started.html - 2025-12-25T10:57:19.744Z + 2025-12-25T11:09:57.211Z https://docs.axolotl.ai/docs/nccl.html - 2025-12-25T10:57:19.748Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/telemetry.html - 2025-12-25T10:57:19.749Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/docs/unsloth.html - 2025-12-25T10:57:19.749Z + 2025-12-25T11:09:57.214Z https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html - 2025-12-25T10:57:19.781Z + 2025-12-25T11:09:57.232Z