diff --git a/.nojekyll b/.nojekyll index 1c0898545..2bbcb2af9 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -341745a1 \ No newline at end of file +63c2f49b \ No newline at end of file diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html index 3ee0da140..f3f42daa3 100644 --- a/docs/custom_integrations.html +++ b/docs/custom_integrations.html @@ -619,7 +619,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); -
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953"
+
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@f643b88"

Usage

@@ -659,6 +659,8 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
  • llama4
  • llama4_text
  • llava
  • +
  • ministral
  • +
  • ministral3
  • mistral
  • mistral3
  • mixtral
  • diff --git a/examples/colab-notebooks/colab-axolotl-example.html b/examples/colab-notebooks/colab-axolotl-example.html index 7210cdbd9..32b92e0fd 100644 --- a/examples/colab-notebooks/colab-axolotl-example.html +++ b/examples/colab-notebooks/colab-axolotl-example.html @@ -567,7 +567,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
    %%capture
     # This step can take ~5-10 minutes to install dependencies
     !pip install --no-build-isolation axolotl[flash-attn]>=0.9.1
    -!pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953"
    +!pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@f643b88"

    Demo: Talk Like a Pirate

    diff --git a/index.html b/index.html index 8a4344dce..8fcffbd21 100644 --- a/index.html +++ b/index.html @@ -564,7 +564,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});

    🎉 Latest Updates

      -
    • 2025/11: Axolotl now includes support for Olmo3.
    • +
    • 2025/12: Axolotl now includes support for Olmo3, Trinity, and Ministral3.
    • 2025/10: New model support has been added in Axolotl for: Qwen3 Next, Qwen2.5-vl, Qwen3-vl, Qwen3, Qwen3MoE, Granite 4, HunYuan, Magistral 2509, Apertus, and Seed-OSS.
    • 2025/09: Axolotl now has text diffusion training. Read more here.
    • 2025/08: QAT has been updated to include NVFP4 support. See PR.
    • diff --git a/search.json b/search.json index 3e78649e0..ea42006f2 100644 --- a/search.json +++ b/search.json @@ -1910,7 +1910,7 @@ "href": "docs/custom_integrations.html#cut-cross-entropy", "title": "Custom Integrations", "section": "Cut Cross Entropy", - "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@5eff953\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\napertus\narcee\ncohere\ncohere2\ndeepseek_v3\ngemma\ngemma2\ngemma3\ngemma3_text\ngemma3n\ngemma3n_text\nglm\nglm4\nglm4_moe\nglm4v\nglm4v_moe\ngpt_oss\ngranite\ngranitemoe\ngranitemoeshared\ngranitemoehybrid\nhunyuan_v1_dense\nhunyuan_v1_moe\nlfm2\nlfm2_moe\nlfm2_vl\nllama\nllama4\nllama4_text\nllava\nmistral\nmistral3\nmixtral\nmllama\nolmo\nolmo2\nolmo3\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_vl\nqwen2_moe\nqwen2_5_vl\nqwen3\nqwen3_moe\nqwen3_vl\nqwen3_vl_moe\nqwen3_next\nsmollm3\nseed_oss\nvoxtral\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", + "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@f643b88\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\napertus\narcee\ncohere\ncohere2\ndeepseek_v3\ngemma\ngemma2\ngemma3\ngemma3_text\ngemma3n\ngemma3n_text\nglm\nglm4\nglm4_moe\nglm4v\nglm4v_moe\ngpt_oss\ngranite\ngranitemoe\ngranitemoeshared\ngranitemoehybrid\nhunyuan_v1_dense\nhunyuan_v1_moe\nlfm2\nlfm2_moe\nlfm2_vl\nllama\nllama4\nllama4_text\nllava\nministral\nministral3\nmistral\nmistral3\nmixtral\nmllama\nolmo\nolmo2\nolmo3\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_vl\nqwen2_moe\nqwen2_5_vl\nqwen3\nqwen3_moe\nqwen3_vl\nqwen3_vl_moe\nqwen3_next\nsmollm3\nseed_oss\nvoxtral\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", "crumbs": [ "Advanced Features", "Custom Integrations" @@ -2030,7 +2030,7 @@ "href": "index.html#latest-updates", "title": "Axolotl", "section": "🎉 Latest Updates", - "text": "🎉 Latest Updates\n\n2025/11: Axolotl now includes support for Olmo3.\n2025/10: New model support has been added in Axolotl for: Qwen3 Next, Qwen2.5-vl, Qwen3-vl, Qwen3, Qwen3MoE, Granite 4, HunYuan, Magistral 2509, Apertus, and Seed-OSS.\n2025/09: Axolotl now has text diffusion training. Read more here.\n2025/08: QAT has been updated to include NVFP4 support. See PR.\n2025/07:\n\nND Parallelism support has been added into Axolotl. Compose Context Parallelism (CP), Tensor Parallelism (TP), and Fully Sharded Data Parallelism (FSDP) within a single node and across multiple nodes. Check out the blog post for more info.\nAxolotl adds more models: GPT-OSS, Gemma 3n, Liquid Foundation Model 2 (LFM2), and Arcee Foundation Models (AFM).\nFP8 finetuning with fp8 gather op is now possible in Axolotl via torchao. Get started here!\nVoxtral, Magistral 1.1, and Devstral with mistral-common tokenizer support has been integrated in Axolotl!\nTiledMLP support for single-GPU to multi-GPU training with DDP, DeepSpeed and FSDP support has been added to support Arctic Long Sequence Training. (ALST). See examples for using ALST with Axolotl!\n\n2025/05: Quantization Aware Training (QAT) support has been added to Axolotl. Explore the docs to learn more!\n\n\n\nExpand older updates\n\n\n2025/03: Axolotl has implemented Sequence Parallelism (SP) support. Read the blog and docs to learn how to scale your context length when fine-tuning.\n2025/06: Magistral with mistral-common tokenizer support has been added to Axolotl. See examples to start training your own Magistral models with Axolotl!\n2025/04: Llama 4 support has been added in Axolotl. See examples to start training your own Llama 4 models with Axolotl’s linearized version!\n2025/03: (Beta) Fine-tuning Multimodal models is now supported in Axolotl. Check out the docs to fine-tune your own!\n2025/02: Axolotl has added LoRA optimizations to reduce memory usage and improve training speed for LoRA and QLoRA in single GPU and multi-GPU training (DDP and DeepSpeed). Jump into the docs to give it a try.\n2025/02: Axolotl has added GRPO support. Dive into our blog and GRPO example and have some fun!\n2025/01: Axolotl has added Reward Modelling / Process Reward Modelling fine-tuning support. See docs.", + "text": "🎉 Latest Updates\n\n2025/12: Axolotl now includes support for Olmo3, Trinity, and Ministral3.\n2025/10: New model support has been added in Axolotl for: Qwen3 Next, Qwen2.5-vl, Qwen3-vl, Qwen3, Qwen3MoE, Granite 4, HunYuan, Magistral 2509, Apertus, and Seed-OSS.\n2025/09: Axolotl now has text diffusion training. Read more here.\n2025/08: QAT has been updated to include NVFP4 support. See PR.\n2025/07:\n\nND Parallelism support has been added into Axolotl. Compose Context Parallelism (CP), Tensor Parallelism (TP), and Fully Sharded Data Parallelism (FSDP) within a single node and across multiple nodes. Check out the blog post for more info.\nAxolotl adds more models: GPT-OSS, Gemma 3n, Liquid Foundation Model 2 (LFM2), and Arcee Foundation Models (AFM).\nFP8 finetuning with fp8 gather op is now possible in Axolotl via torchao. Get started here!\nVoxtral, Magistral 1.1, and Devstral with mistral-common tokenizer support has been integrated in Axolotl!\nTiledMLP support for single-GPU to multi-GPU training with DDP, DeepSpeed and FSDP support has been added to support Arctic Long Sequence Training. (ALST). See examples for using ALST with Axolotl!\n\n2025/05: Quantization Aware Training (QAT) support has been added to Axolotl. Explore the docs to learn more!\n\n\n\nExpand older updates\n\n\n2025/03: Axolotl has implemented Sequence Parallelism (SP) support. Read the blog and docs to learn how to scale your context length when fine-tuning.\n2025/06: Magistral with mistral-common tokenizer support has been added to Axolotl. See examples to start training your own Magistral models with Axolotl!\n2025/04: Llama 4 support has been added in Axolotl. See examples to start training your own Llama 4 models with Axolotl’s linearized version!\n2025/03: (Beta) Fine-tuning Multimodal models is now supported in Axolotl. Check out the docs to fine-tune your own!\n2025/02: Axolotl has added LoRA optimizations to reduce memory usage and improve training speed for LoRA and QLoRA in single GPU and multi-GPU training (DDP and DeepSpeed). Jump into the docs to give it a try.\n2025/02: Axolotl has added GRPO support. Dive into our blog and GRPO example and have some fun!\n2025/01: Axolotl has added Reward Modelling / Process Reward Modelling fine-tuning support. See docs.", "crumbs": [ "Home" ] diff --git a/sitemap.xml b/sitemap.xml index c9192ae05..b04d5abc3 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,802 +2,802 @@ https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html - 2025-12-02T18:13:06.077Z + 2025-12-04T13:32:19.462Z https://docs.axolotl.ai/docs/mac.html - 2025-12-02T18:13:06.054Z + 2025-12-04T13:32:19.439Z https://docs.axolotl.ai/docs/cli.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/mixed_precision.html - 2025-12-02T18:13:06.054Z + 2025-12-04T13:32:19.439Z https://docs.axolotl.ai/docs/installation.html - 2025-12-02T18:13:06.054Z + 2025-12-04T13:32:19.439Z https://docs.axolotl.ai/docs/dataset_loading.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/sequence_parallelism.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/optimizations.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/gradient_checkpointing.html - 2025-12-02T18:13:06.052Z + 2025-12-04T13:32:19.437Z https://docs.axolotl.ai/docs/docker.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/input_output.html - 2025-12-02T18:13:06.054Z + 2025-12-04T13:32:19.439Z https://docs.axolotl.ai/docs/multi-gpu.html - 2025-12-02T18:13:06.054Z + 2025-12-04T13:32:19.439Z https://docs.axolotl.ai/docs/dataset-formats/template_free.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/dataset-formats/tokenized.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/dataset-formats/pretraining.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/rlhf.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/multi-node.html - 2025-12-02T18:13:06.054Z + 2025-12-04T13:32:19.439Z https://docs.axolotl.ai/docs/batch_vs_grad.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/nd_parallelism.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/fsdp_qlora.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/quantize.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/reward_modelling.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html - 2025-12-02T18:16:44.069Z + 2025-12-04T13:35:55.473Z https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html - 2025-12-02T18:16:44.980Z + 2025-12-04T13:35:56.363Z https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html - 2025-12-02T18:16:45.490Z + 2025-12-04T13:35:56.862Z https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html - 2025-12-02T18:16:45.481Z + 2025-12-04T13:35:56.853Z https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html - 2025-12-02T18:16:44.067Z + 2025-12-04T13:35:55.472Z https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html - 2025-12-02T18:16:45.619Z + 2025-12-04T13:35:56.987Z https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html - 2025-12-02T18:16:45.624Z + 2025-12-04T13:35:56.992Z https://docs.axolotl.ai/docs/api/cli.preprocess.html - 2025-12-02T18:16:44.262Z + 2025-12-04T13:35:55.662Z https://docs.axolotl.ai/docs/api/cli.utils.load.html - 2025-12-02T18:16:44.317Z + 2025-12-04T13:35:55.716Z https://docs.axolotl.ai/docs/api/cli.inference.html - 2025-12-02T18:16:44.227Z + 2025-12-04T13:35:55.628Z https://docs.axolotl.ai/docs/api/cli.args.html - 2025-12-02T18:16:44.171Z + 2025-12-04T13:35:55.573Z https://docs.axolotl.ai/docs/api/prompt_tokenizers.html - 2025-12-02T18:16:43.991Z + 2025-12-04T13:35:55.397Z https://docs.axolotl.ai/docs/api/cli.vllm_serve.html - 2025-12-02T18:16:44.276Z + 2025-12-04T13:35:55.676Z https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html - 2025-12-02T18:16:44.991Z + 2025-12-04T13:35:56.374Z https://docs.axolotl.ai/docs/api/loaders.constants.html - 2025-12-02T18:16:44.464Z + 2025-12-04T13:35:55.859Z https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html - 2025-12-02T18:16:44.628Z + 2025-12-04T13:35:56.019Z https://docs.axolotl.ai/docs/api/core.trainers.trl.html - 2025-12-02T18:16:44.375Z + 2025-12-04T13:35:55.772Z https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html - 2025-12-02T18:16:45.526Z + 2025-12-04T13:35:56.897Z https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html - 2025-12-02T18:16:44.966Z + 2025-12-04T13:35:56.350Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html - 2025-12-02T18:16:44.571Z + 2025-12-04T13:35:55.964Z https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html - 2025-12-02T18:16:44.642Z + 2025-12-04T13:35:56.033Z https://docs.axolotl.ai/docs/api/utils.freeze.html - 2025-12-02T18:16:45.066Z + 2025-12-04T13:35:56.448Z https://docs.axolotl.ai/docs/api/logging_config.html - 2025-12-02T18:16:44.003Z + 2025-12-04T13:35:55.408Z https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html - 2025-12-02T18:16:44.970Z + 2025-12-04T13:35:56.354Z https://docs.axolotl.ai/docs/api/cli.art.html - 2025-12-02T18:16:44.175Z + 2025-12-04T13:35:55.577Z https://docs.axolotl.ai/docs/api/utils.collators.mamba.html - 2025-12-02T18:16:45.555Z + 2025-12-04T13:35:56.925Z https://docs.axolotl.ai/docs/api/cli.utils.train.html - 2025-12-02T18:16:44.339Z + 2025-12-04T13:35:55.737Z https://docs.axolotl.ai/docs/api/integrations.base.html - 2025-12-02T18:16:45.476Z + 2025-12-04T13:35:56.847Z https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html - 2025-12-02T18:16:44.389Z + 2025-12-04T13:35:55.786Z https://docs.axolotl.ai/docs/api/cli.checks.html - 2025-12-02T18:16:44.183Z + 2025-12-04T13:35:55.585Z https://docs.axolotl.ai/docs/api/common.architectures.html - 2025-12-02T18:16:45.504Z + 2025-12-04T13:35:56.876Z https://docs.axolotl.ai/docs/api/utils.bench.html - 2025-12-02T18:16:45.057Z + 2025-12-04T13:35:56.439Z https://docs.axolotl.ai/docs/api/utils.data.streaming.html - 2025-12-02T18:16:45.163Z + 2025-12-04T13:35:56.543Z https://docs.axolotl.ai/docs/api/utils.chat_templates.html - 2025-12-02T18:16:45.040Z + 2025-12-04T13:35:56.422Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html - 2025-12-02T18:16:44.587Z + 2025-12-04T13:35:55.980Z https://docs.axolotl.ai/docs/api/core.trainers.utils.html - 2025-12-02T18:16:44.419Z + 2025-12-04T13:35:55.815Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html - 2025-12-02T18:16:44.696Z + 2025-12-04T13:35:56.085Z https://docs.axolotl.ai/docs/api/cli.evaluate.html - 2025-12-02T18:16:44.147Z + 2025-12-04T13:35:55.550Z https://docs.axolotl.ai/docs/api/monkeypatch.relora.html - 2025-12-02T18:16:44.909Z + 2025-12-04T13:35:56.294Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html - 2025-12-02T18:16:44.694Z + 2025-12-04T13:35:56.083Z https://docs.axolotl.ai/docs/api/cli.utils.fetch.html - 2025-12-02T18:16:44.310Z + 2025-12-04T13:35:55.709Z https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html - 2025-12-02T18:16:45.250Z + 2025-12-04T13:35:56.628Z https://docs.axolotl.ai/docs/api/common.datasets.html - 2025-12-02T18:16:45.525Z + 2025-12-04T13:35:56.895Z https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html - 2025-12-02T18:16:45.630Z + 2025-12-04T13:35:56.998Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html - 2025-12-02T18:16:44.720Z + 2025-12-04T13:35:56.108Z https://docs.axolotl.ai/docs/api/utils.schemas.model.html - 2025-12-02T18:16:45.219Z + 2025-12-04T13:35:56.598Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html - 2025-12-02T18:16:44.901Z + 2025-12-04T13:35:56.286Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html - 2025-12-02T18:16:44.959Z + 2025-12-04T13:35:56.343Z https://docs.axolotl.ai/docs/api/core.trainers.base.html - 2025-12-02T18:16:44.356Z + 2025-12-04T13:35:55.754Z https://docs.axolotl.ai/docs/api/cli.utils.args.html - 2025-12-02T18:16:44.304Z + 2025-12-04T13:35:55.703Z https://docs.axolotl.ai/docs/api/utils.schemas.trl.html - 2025-12-02T18:16:45.265Z + 2025-12-04T13:35:56.642Z https://docs.axolotl.ai/docs/api/core.builders.base.html - 2025-12-02T18:16:44.010Z + 2025-12-04T13:35:55.416Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html - 2025-12-02T18:16:44.899Z + 2025-12-04T13:35:56.284Z https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html - 2025-12-02T18:16:44.288Z + 2025-12-04T13:35:55.687Z https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html - 2025-12-02T18:16:45.162Z + 2025-12-04T13:35:56.541Z https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html - 2025-12-02T18:16:45.502Z + 2025-12-04T13:35:56.874Z https://docs.axolotl.ai/docs/api/cli.main.html - 2025-12-02T18:16:44.127Z + 2025-12-04T13:35:55.530Z https://docs.axolotl.ai/docs/api/kernels.lora.html - 2025-12-02T18:16:44.856Z + 2025-12-04T13:35:56.242Z https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html - 2025-12-02T18:16:45.052Z + 2025-12-04T13:35:56.435Z https://docs.axolotl.ai/docs/api/utils.distributed.html - 2025-12-02T18:16:45.145Z + 2025-12-04T13:35:56.526Z https://docs.axolotl.ai/docs/api/loaders.model.html - 2025-12-02T18:16:44.431Z + 2025-12-04T13:35:55.827Z https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html - 2025-12-02T18:16:44.903Z + 2025-12-04T13:35:56.287Z https://docs.axolotl.ai/docs/api/evaluate.html - 2025-12-02T18:16:43.916Z + 2025-12-04T13:35:55.323Z https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html - 2025-12-02T18:16:44.210Z + 2025-12-04T13:35:55.611Z https://docs.axolotl.ai/docs/api/utils.trainer.html - 2025-12-02T18:16:45.087Z + 2025-12-04T13:35:56.469Z https://docs.axolotl.ai/docs/api/cli.quantize.html - 2025-12-02T18:16:44.268Z + 2025-12-04T13:35:55.668Z https://docs.axolotl.ai/docs/api/common.const.html - 2025-12-02T18:16:45.506Z + 2025-12-04T13:35:56.877Z https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html - 2025-12-02T18:16:44.613Z + 2025-12-04T13:35:56.005Z https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html - 2025-12-02T18:16:45.561Z + 2025-12-04T13:35:56.931Z https://docs.axolotl.ai/docs/api/core.datasets.chat.html - 2025-12-02T18:16:44.077Z + 2025-12-04T13:35:55.481Z https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html - 2025-12-02T18:16:44.751Z + 2025-12-04T13:35:56.139Z https://docs.axolotl.ai/docs/api/core.trainers.mamba.html - 2025-12-02T18:16:44.381Z + 2025-12-04T13:35:55.778Z https://docs.axolotl.ai/docs/api/core.builders.causal.html - 2025-12-02T18:16:44.016Z + 2025-12-04T13:35:55.421Z https://docs.axolotl.ai/docs/api/kernels.geglu.html - 2025-12-02T18:16:44.869Z + 2025-12-04T13:35:56.254Z https://docs.axolotl.ai/docs/api/utils.schemas.utils.html - 2025-12-02T18:16:45.307Z + 2025-12-04T13:35:56.683Z https://docs.axolotl.ai/docs/custom_integrations.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/index.html - 2025-12-02T18:13:06.072Z + 2025-12-04T13:32:19.458Z https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html - 2025-12-02T18:13:06.059Z + 2025-12-04T13:32:19.444Z https://docs.axolotl.ai/FAQS.html - 2025-12-02T18:13:06.049Z + 2025-12-04T13:32:19.434Z https://docs.axolotl.ai/docs/inference.html - 2025-12-02T18:13:06.054Z + 2025-12-04T13:32:19.439Z https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html - 2025-12-02T18:16:44.086Z + 2025-12-04T13:35:55.490Z https://docs.axolotl.ai/docs/api/train.html - 2025-12-02T18:16:43.903Z + 2025-12-04T13:35:55.310Z https://docs.axolotl.ai/docs/api/utils.schemas.training.html - 2025-12-02T18:16:45.228Z + 2025-12-04T13:35:56.606Z https://docs.axolotl.ai/docs/api/kernels.quantize.html - 2025-12-02T18:16:44.891Z + 2025-12-04T13:35:56.275Z https://docs.axolotl.ai/docs/api/utils.dict.html - 2025-12-02T18:16:45.152Z + 2025-12-04T13:35:56.532Z https://docs.axolotl.ai/docs/api/utils.quantization.html - 2025-12-02T18:16:45.195Z + 2025-12-04T13:35:56.574Z https://docs.axolotl.ai/docs/api/utils.collators.batching.html - 2025-12-02T18:16:45.551Z + 2025-12-04T13:35:56.921Z https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html - 2025-12-02T18:16:44.905Z + 2025-12-04T13:35:56.289Z https://docs.axolotl.ai/docs/api/cli.config.html - 2025-12-02T18:16:44.204Z + 2025-12-04T13:35:55.606Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html - 2025-12-02T18:16:44.471Z + 2025-12-04T13:35:55.866Z https://docs.axolotl.ai/docs/api/utils.lora.html - 2025-12-02T18:16:45.046Z + 2025-12-04T13:35:56.428Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html - 2025-12-02T18:16:44.417Z + 2025-12-04T13:35:55.813Z https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html - 2025-12-02T18:16:44.647Z + 2025-12-04T13:35:56.037Z https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html - 2025-12-02T18:16:44.993Z + 2025-12-04T13:35:56.376Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html - 2025-12-02T18:16:44.681Z + 2025-12-04T13:35:56.071Z https://docs.axolotl.ai/docs/api/index.html - 2025-12-02T18:16:43.824Z + 2025-12-04T13:35:55.233Z https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html - 2025-12-02T18:16:44.512Z + 2025-12-04T13:35:55.906Z https://docs.axolotl.ai/docs/api/cli.utils.html - 2025-12-02T18:16:44.290Z + 2025-12-04T13:35:55.689Z https://docs.axolotl.ai/docs/api/cli.merge_lora.html - 2025-12-02T18:16:44.238Z + 2025-12-04T13:35:55.638Z https://docs.axolotl.ai/docs/api/kernels.utils.html - 2025-12-02T18:16:44.892Z + 2025-12-04T13:35:56.277Z https://docs.axolotl.ai/docs/api/prompt_strategies.base.html - 2025-12-02T18:16:44.514Z + 2025-12-04T13:35:55.908Z https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html - 2025-12-02T18:16:45.634Z + 2025-12-04T13:35:57.002Z https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html - 2025-12-02T18:16:45.272Z + 2025-12-04T13:35:56.649Z https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html - 2025-12-02T18:16:44.554Z + 2025-12-04T13:35:55.947Z https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html - 2025-12-02T18:16:45.643Z + 2025-12-04T13:35:57.010Z https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html - 2025-12-02T18:16:45.611Z + 2025-12-04T13:35:56.980Z https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html - 2025-12-02T18:16:44.747Z + 2025-12-04T13:35:56.135Z https://docs.axolotl.ai/docs/api/core.chat.format.shared.html - 2025-12-02T18:16:44.071Z + 2025-12-04T13:35:55.475Z https://docs.axolotl.ai/docs/api/utils.collators.core.html - 2025-12-02T18:16:45.528Z + 2025-12-04T13:35:56.898Z https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html - 2025-12-02T18:16:45.498Z + 2025-12-04T13:35:56.870Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html - 2025-12-02T18:16:44.403Z + 2025-12-04T13:35:55.799Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html - 2025-12-02T18:16:44.669Z + 2025-12-04T13:35:56.058Z https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html - 2025-12-02T18:16:44.957Z + 2025-12-04T13:35:56.341Z https://docs.axolotl.ai/docs/api/utils.schemas.enums.html - 2025-12-02T18:16:45.300Z + 2025-12-04T13:35:56.677Z https://docs.axolotl.ai/docs/api/utils.schemas.config.html - 2025-12-02T18:16:45.211Z + 2025-12-04T13:35:56.589Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html - 2025-12-02T18:16:44.700Z + 2025-12-04T13:35:56.089Z https://docs.axolotl.ai/docs/api/convert.html - 2025-12-02T18:16:43.940Z + 2025-12-04T13:35:55.347Z https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html - 2025-12-02T18:16:45.289Z + 2025-12-04T13:35:56.666Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html - 2025-12-02T18:16:44.710Z + 2025-12-04T13:35:56.098Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html - 2025-12-02T18:16:44.698Z + 2025-12-04T13:35:56.087Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html - 2025-12-02T18:16:44.483Z + 2025-12-04T13:35:55.878Z https://docs.axolotl.ai/docs/api/core.chat.messages.html - 2025-12-02T18:16:44.066Z + 2025-12-04T13:35:55.470Z https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html - 2025-12-02T18:16:44.597Z + 2025-12-04T13:35:55.989Z https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html - 2025-12-02T18:16:44.660Z + 2025-12-04T13:35:56.050Z https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html - 2025-12-02T18:16:44.946Z + 2025-12-04T13:35:56.329Z https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html - 2025-12-02T18:16:44.634Z + 2025-12-04T13:35:56.025Z https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html - 2025-12-02T18:16:44.621Z + 2025-12-04T13:35:56.012Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html - 2025-12-02T18:16:44.475Z + 2025-12-04T13:35:55.870Z https://docs.axolotl.ai/docs/api/cli.train.html - 2025-12-02T18:16:44.137Z + 2025-12-04T13:35:55.540Z https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html - 2025-12-02T18:16:44.252Z + 2025-12-04T13:35:55.652Z https://docs.axolotl.ai/docs/api/loaders.adapter.html - 2025-12-02T18:16:44.450Z + 2025-12-04T13:35:55.845Z https://docs.axolotl.ai/docs/api/core.training_args.html - 2025-12-02T18:16:44.037Z + 2025-12-04T13:35:55.442Z https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html - 2025-12-02T18:16:45.626Z + 2025-12-04T13:35:56.994Z https://docs.axolotl.ai/docs/api/loaders.processor.html - 2025-12-02T18:16:44.443Z + 2025-12-04T13:35:55.838Z https://docs.axolotl.ai/docs/api/core.builders.rl.html - 2025-12-02T18:16:44.022Z + 2025-12-04T13:35:55.427Z https://docs.axolotl.ai/docs/api/monkeypatch.utils.html - 2025-12-02T18:16:44.955Z + 2025-12-04T13:35:56.339Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html - 2025-12-02T18:16:44.721Z + 2025-12-04T13:35:56.110Z https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html - 2025-12-02T18:16:45.480Z + 2025-12-04T13:35:56.851Z https://docs.axolotl.ai/docs/api/kernels.swiglu.html - 2025-12-02T18:16:44.882Z + 2025-12-04T13:35:56.267Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html - 2025-12-02T18:16:45.029Z + 2025-12-04T13:35:56.412Z https://docs.axolotl.ai/docs/api/cli.cloud.base.html - 2025-12-02T18:16:44.280Z + 2025-12-04T13:35:55.680Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html - 2025-12-02T18:16:44.573Z + 2025-12-04T13:35:55.965Z https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html - 2025-12-02T18:16:44.655Z + 2025-12-04T13:35:56.045Z https://docs.axolotl.ai/docs/api/utils.schemas.peft.html - 2025-12-02T18:16:45.261Z + 2025-12-04T13:35:56.638Z https://docs.axolotl.ai/docs/api/integrations.liger.args.html - 2025-12-02T18:16:45.494Z + 2025-12-04T13:35:56.866Z https://docs.axolotl.ai/docs/api/loaders.patch_manager.html - 2025-12-02T18:16:44.462Z + 2025-12-04T13:35:55.857Z https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html - 2025-12-02T18:16:44.978Z + 2025-12-04T13:35:56.361Z https://docs.axolotl.ai/docs/api/utils.data.sft.html - 2025-12-02T18:16:45.171Z + 2025-12-04T13:35:56.551Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html - 2025-12-02T18:16:44.997Z + 2025-12-04T13:35:56.380Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html - 2025-12-02T18:16:44.911Z + 2025-12-04T13:35:56.295Z https://docs.axolotl.ai/docs/api/loaders.tokenizer.html - 2025-12-02T18:16:44.441Z + 2025-12-04T13:35:55.837Z https://docs.axolotl.ai/docs/api/utils.tokenization.html - 2025-12-02T18:16:45.038Z + 2025-12-04T13:35:56.420Z https://docs.axolotl.ai/docs/api/datasets.html - 2025-12-02T18:16:43.923Z + 2025-12-04T13:35:55.330Z https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html - 2025-12-02T18:16:44.324Z + 2025-12-04T13:35:55.723Z https://docs.axolotl.ai/docs/api/utils.schedulers.html - 2025-12-02T18:16:45.121Z + 2025-12-04T13:35:56.501Z https://docs.axolotl.ai/docs/optimizers.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/torchao.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/dataset_preprocessing.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/faq.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/ray-integration.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/multimodal.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/config-reference.html - 2025-12-02T18:17:01.243Z + 2025-12-04T13:36:12.459Z https://docs.axolotl.ai/docs/dataset-formats/index.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/dataset-formats/conversation.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/debugging.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/amd_hpc.html - 2025-12-02T18:13:06.051Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/lora_optims.html - 2025-12-02T18:13:06.054Z + 2025-12-04T13:32:19.439Z https://docs.axolotl.ai/docs/streaming.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/multipack.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/qat.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/lr_groups.html - 2025-12-02T18:13:06.054Z + 2025-12-04T13:32:19.439Z https://docs.axolotl.ai/docs/getting-started.html - 2025-12-02T18:13:06.052Z + 2025-12-04T13:32:19.436Z https://docs.axolotl.ai/docs/nccl.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/telemetry.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/docs/unsloth.html - 2025-12-02T18:13:06.055Z + 2025-12-04T13:32:19.440Z https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html - 2025-12-02T18:13:06.077Z + 2025-12-04T13:32:19.462Z