diff --git a/.nojekyll b/.nojekyll index a73629108..576a9293d 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -4f9512c6 \ No newline at end of file +c00becba \ No newline at end of file diff --git a/docs/api/utils.callbacks.profiler.html b/docs/api/utils.callbacks.profiler.html index dde9921bc..4cc5a4585 100644 --- a/docs/api/utils.callbacks.profiler.html +++ b/docs/api/utils.callbacks.profiler.html @@ -797,6 +797,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); profiler_steps_start=0, )

PyTorch Profiler callback to create snapshots of GPU memory usage at specified steps.

+

Also runs torch.profiler to produce a Chrome trace for timing analysis.

diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html index e8c9dac3e..18d68b382 100644 --- a/docs/custom_integrations.html +++ b/docs/custom_integrations.html @@ -963,7 +963,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); -
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@fa9a7fe"
+
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@63b15e6"

Usage

diff --git a/examples/colab-notebooks/colab-axolotl-example.html b/examples/colab-notebooks/colab-axolotl-example.html index 1eacd9278..6fb778c94 100644 --- a/examples/colab-notebooks/colab-axolotl-example.html +++ b/examples/colab-notebooks/colab-axolotl-example.html @@ -810,7 +810,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
%%capture
 # This step can take ~5-10 minutes to install dependencies
 !pip install --no-build-isolation axolotl[flash-attn]>=0.9.1
-!pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@fa9a7fe"
+!pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@63b15e6"

Demo: Talk Like a Pirate

diff --git a/search.json b/search.json index 19e3e11c0..a8f8106f0 100644 --- a/search.json +++ b/search.json @@ -2558,7 +2558,7 @@ "href": "docs/custom_integrations.html#cut-cross-entropy", "title": "Custom Integrations", "section": "Cut Cross Entropy", - "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@fa9a7fe\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\nafmoe\napertus\narcee\ncohere\ncohere2\ndeepseek_v3\nexaone4\ngemma\ngemma2\ngemma3\ngemma3_text\ngemma3n\ngemma3n_text\nglm\nglm4\nglm4_moe\nglm4_moe_lite\nglm46v\nglm4v\nglm4v_moe\nglm_image\nglm_moe_dsa\ngpt_oss\ngranite\ngranitemoe\ngranitemoehybrid\ngranitemoeshared\nhunyuan_v1_dense\nhunyuan_v1_moe\ninternvl\nkimi_linear\nlfm2\nlfm2_moe\nlfm2_vl\nllama\nllama4\nllama4_text\nllava\nministral\nministral3\nmistral\nmistral3\nmistral4\nmixtral\nmllama\nnemotron_h\nolmo\nolmo2\nolmo3\nolmoe\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_5_vl\nqwen2_moe\nqwen2_vl\nqwen3\nqwen3_5\nqwen3_5_text\nqwen3_5_moe\nqwen3_5_moe_text\nqwen3_moe\nqwen3_next\nqwen3_vl\nqwen3_vl_moe\nseed_oss\nsmollm3\nstep3p5\nvoxtral\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", + "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@63b15e6\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\nafmoe\napertus\narcee\ncohere\ncohere2\ndeepseek_v3\nexaone4\ngemma\ngemma2\ngemma3\ngemma3_text\ngemma3n\ngemma3n_text\nglm\nglm4\nglm4_moe\nglm4_moe_lite\nglm46v\nglm4v\nglm4v_moe\nglm_image\nglm_moe_dsa\ngpt_oss\ngranite\ngranitemoe\ngranitemoehybrid\ngranitemoeshared\nhunyuan_v1_dense\nhunyuan_v1_moe\ninternvl\nkimi_linear\nlfm2\nlfm2_moe\nlfm2_vl\nllama\nllama4\nllama4_text\nllava\nministral\nministral3\nmistral\nmistral3\nmistral4\nmixtral\nmllama\nnemotron_h\nolmo\nolmo2\nolmo3\nolmoe\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_5_vl\nqwen2_moe\nqwen2_vl\nqwen3\nqwen3_5\nqwen3_5_text\nqwen3_5_moe\nqwen3_5_moe_text\nqwen3_moe\nqwen3_next\nqwen3_vl\nqwen3_vl_moe\nseed_oss\nsmollm3\nstep3p5\nvoxtral\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", "crumbs": [ "Advanced Features", "Custom Integrations" @@ -4075,14 +4075,14 @@ "href": "docs/api/utils.callbacks.profiler.html", "title": "utils.callbacks.profiler", "section": "", - "text": "utils.callbacks.profiler\nHF Trainer callback for creating pytorch profiling snapshots\n\n\n\n\n\nName\nDescription\n\n\n\n\nPytorchProfilerCallback\nPyTorch Profiler callback to create snapshots of GPU memory usage at specified steps.\n\n\n\n\n\nutils.callbacks.profiler.PytorchProfilerCallback(\n steps_to_profile=5,\n profiler_steps_start=0,\n)\nPyTorch Profiler callback to create snapshots of GPU memory usage at specified steps." + "text": "utils.callbacks.profiler\nHF Trainer callback for creating pytorch profiling snapshots\n\n\n\n\n\nName\nDescription\n\n\n\n\nPytorchProfilerCallback\nPyTorch Profiler callback to create snapshots of GPU memory usage at specified steps.\n\n\n\n\n\nutils.callbacks.profiler.PytorchProfilerCallback(\n steps_to_profile=5,\n profiler_steps_start=0,\n)\nPyTorch Profiler callback to create snapshots of GPU memory usage at specified steps.\nAlso runs torch.profiler to produce a Chrome trace for timing analysis." }, { "objectID": "docs/api/utils.callbacks.profiler.html#classes", "href": "docs/api/utils.callbacks.profiler.html#classes", "title": "utils.callbacks.profiler", "section": "", - "text": "Name\nDescription\n\n\n\n\nPytorchProfilerCallback\nPyTorch Profiler callback to create snapshots of GPU memory usage at specified steps.\n\n\n\n\n\nutils.callbacks.profiler.PytorchProfilerCallback(\n steps_to_profile=5,\n profiler_steps_start=0,\n)\nPyTorch Profiler callback to create snapshots of GPU memory usage at specified steps." + "text": "Name\nDescription\n\n\n\n\nPytorchProfilerCallback\nPyTorch Profiler callback to create snapshots of GPU memory usage at specified steps.\n\n\n\n\n\nutils.callbacks.profiler.PytorchProfilerCallback(\n steps_to_profile=5,\n profiler_steps_start=0,\n)\nPyTorch Profiler callback to create snapshots of GPU memory usage at specified steps.\nAlso runs torch.profiler to produce a Chrome trace for timing analysis." }, { "objectID": "docs/api/kernels.lora.html", diff --git a/sitemap.xml b/sitemap.xml index defeba787..946fa595b 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,954 +2,954 @@ https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html - 2026-03-19T12:29:57.767Z + 2026-03-20T03:08:46.048Z https://docs.axolotl.ai/src/axolotl/core/trainers/grpo/async_trainer.html - 2026-03-19T12:29:57.788Z + 2026-03-20T03:08:46.072Z https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html - 2026-03-19T12:29:57.789Z + 2026-03-20T03:08:46.073Z https://docs.axolotl.ai/docs/faq.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/debugging.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/models/ministral3/vision.html - 2026-03-19T12:33:45.401Z + 2026-03-20T03:12:41.324Z https://docs.axolotl.ai/docs/models/ministral.html - 2026-03-19T12:33:45.403Z + 2026-03-20T03:12:41.327Z https://docs.axolotl.ai/docs/models/qwen3.html - 2026-03-19T12:33:45.406Z + 2026-03-20T03:12:41.331Z https://docs.axolotl.ai/docs/models/magistral.html - 2026-03-19T12:33:45.402Z + 2026-03-20T03:12:41.326Z https://docs.axolotl.ai/docs/models/voxtral.html - 2026-03-19T12:33:45.404Z + 2026-03-20T03:12:41.328Z https://docs.axolotl.ai/docs/models/llama-4.html - 2026-03-19T12:33:45.405Z + 2026-03-20T03:12:41.330Z https://docs.axolotl.ai/docs/models/magistral/think.html - 2026-03-19T12:33:45.402Z + 2026-03-20T03:12:41.326Z https://docs.axolotl.ai/docs/models/ministral3.html - 2026-03-19T12:33:45.400Z + 2026-03-20T03:12:41.324Z https://docs.axolotl.ai/docs/models/arcee.html - 2026-03-19T12:33:45.399Z + 2026-03-20T03:12:41.323Z https://docs.axolotl.ai/docs/models/gemma3n.html - 2026-03-19T12:33:45.406Z + 2026-03-20T03:12:41.331Z https://docs.axolotl.ai/docs/models/plano.html - 2026-03-19T12:33:45.397Z + 2026-03-20T03:12:41.320Z https://docs.axolotl.ai/docs/models/apertus.html - 2026-03-19T12:33:45.406Z + 2026-03-20T03:12:41.332Z https://docs.axolotl.ai/docs/models/phi.html - 2026-03-19T12:33:45.408Z + 2026-03-20T03:12:41.334Z https://docs.axolotl.ai/docs/models/hunyuan.html - 2026-03-19T12:33:45.409Z + 2026-03-20T03:12:41.361Z https://docs.axolotl.ai/docs/models/trinity.html - 2026-03-19T12:33:45.399Z + 2026-03-20T03:12:41.322Z https://docs.axolotl.ai/docs/models/mistral.html - 2026-03-19T12:33:45.405Z + 2026-03-20T03:12:41.329Z https://docs.axolotl.ai/docs/models/LiquidAI.html - 2026-03-19T12:33:45.408Z + 2026-03-20T03:12:41.360Z https://docs.axolotl.ai/docs/multimodal.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/batch_vs_grad.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.035Z https://docs.axolotl.ai/docs/multi-gpu.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.040Z https://docs.axolotl.ai/docs/nccl.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/reward_modelling.html - 2026-03-19T12:29:57.762Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/torchao.html - 2026-03-19T12:29:57.763Z + 2026-03-20T03:08:46.043Z https://docs.axolotl.ai/docs/multipack.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html - 2026-03-19T12:33:23.220Z + 2026-03-20T03:12:18.612Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html - 2026-03-19T12:33:23.094Z + 2026-03-20T03:12:18.488Z https://docs.axolotl.ai/docs/api/utils.chat_templates.html - 2026-03-19T12:33:23.580Z + 2026-03-20T03:12:18.966Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html - 2026-03-19T12:33:23.219Z + 2026-03-20T03:12:18.610Z https://docs.axolotl.ai/docs/api/cli.inference.html - 2026-03-19T12:33:22.728Z + 2026-03-20T03:12:18.123Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html - 2026-03-19T12:33:22.990Z + 2026-03-20T03:12:18.384Z https://docs.axolotl.ai/docs/api/kernels.swiglu.html - 2026-03-19T12:33:23.411Z + 2026-03-20T03:12:18.801Z https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html - 2026-03-19T12:33:23.705Z + 2026-03-20T03:12:19.090Z https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html - 2026-03-19T12:33:24.190Z + 2026-03-20T03:12:19.571Z https://docs.axolotl.ai/docs/api/utils.schemas.utils.html - 2026-03-19T12:33:23.859Z + 2026-03-20T03:12:19.240Z https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html - 2026-03-19T12:33:23.530Z + 2026-03-20T03:12:18.917Z https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html - 2026-03-19T12:33:23.593Z + 2026-03-20T03:12:18.979Z https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html - 2026-03-19T12:33:22.752Z + 2026-03-20T03:12:18.147Z https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html - 2026-03-19T12:33:22.710Z + 2026-03-20T03:12:18.105Z https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html - 2026-03-19T12:33:24.034Z + 2026-03-20T03:12:19.415Z https://docs.axolotl.ai/docs/api/utils.dict.html - 2026-03-19T12:33:23.695Z + 2026-03-20T03:12:19.080Z https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html - 2026-03-19T12:33:23.441Z + 2026-03-20T03:12:18.830Z https://docs.axolotl.ai/docs/api/utils.schemas.config.html - 2026-03-19T12:33:23.756Z + 2026-03-20T03:12:19.141Z https://docs.axolotl.ai/docs/api/cli.cloud.base.html - 2026-03-19T12:33:22.781Z + 2026-03-20T03:12:18.176Z https://docs.axolotl.ai/docs/api/utils.collators.batching.html - 2026-03-19T12:33:24.106Z + 2026-03-20T03:12:19.487Z https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html - 2026-03-19T12:33:23.157Z + 2026-03-20T03:12:18.549Z https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html - 2026-03-19T12:33:24.057Z + 2026-03-20T03:12:19.438Z https://docs.axolotl.ai/docs/api/cli.config.html - 2026-03-19T12:33:22.704Z + 2026-03-20T03:12:18.099Z https://docs.axolotl.ai/docs/api/evaluate.html - 2026-03-19T12:33:22.406Z + 2026-03-20T03:12:17.800Z https://docs.axolotl.ai/docs/api/utils.schemas.training.html - 2026-03-19T12:33:23.774Z + 2026-03-20T03:12:19.158Z https://docs.axolotl.ai/docs/api/core.trainers.base.html - 2026-03-19T12:33:22.859Z + 2026-03-20T03:12:18.253Z https://docs.axolotl.ai/docs/api/cli.utils.args.html - 2026-03-19T12:33:22.805Z + 2026-03-20T03:12:18.200Z https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html - 2026-03-19T12:33:22.562Z + 2026-03-20T03:12:17.956Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html - 2026-03-19T12:33:23.224Z + 2026-03-20T03:12:18.616Z https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html - 2026-03-19T12:33:23.184Z + 2026-03-20T03:12:18.576Z https://docs.axolotl.ai/docs/api/monkeypatch.relora.html - 2026-03-19T12:33:23.445Z + 2026-03-20T03:12:18.834Z https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html - 2026-03-19T12:33:24.199Z + 2026-03-20T03:12:19.579Z https://docs.axolotl.ai/docs/api/cli.art.html - 2026-03-19T12:33:22.674Z + 2026-03-20T03:12:18.069Z https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html - 2026-03-19T12:33:24.035Z + 2026-03-20T03:12:19.416Z https://docs.axolotl.ai/docs/api/datasets.html - 2026-03-19T12:33:22.414Z + 2026-03-20T03:12:17.808Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html - 2026-03-19T12:33:23.234Z + 2026-03-20T03:12:18.626Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html - 2026-03-19T12:33:23.245Z + 2026-03-20T03:12:18.636Z https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html - 2026-03-19T12:33:23.032Z + 2026-03-20T03:12:18.426Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html - 2026-03-19T12:33:22.911Z + 2026-03-20T03:12:18.305Z https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html - 2026-03-19T12:33:24.167Z + 2026-03-20T03:12:19.547Z https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html - 2026-03-19T12:33:22.893Z + 2026-03-20T03:12:18.287Z https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html - 2026-03-19T12:33:23.532Z + 2026-03-20T03:12:18.919Z https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html - 2026-03-19T12:33:23.819Z + 2026-03-20T03:12:19.202Z https://docs.axolotl.ai/docs/api/utils.lora.html - 2026-03-19T12:33:23.586Z + 2026-03-20T03:12:18.972Z https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html - 2026-03-19T12:33:22.564Z + 2026-03-20T03:12:17.958Z https://docs.axolotl.ai/docs/api/cli.train.html - 2026-03-19T12:33:22.635Z + 2026-03-20T03:12:18.027Z https://docs.axolotl.ai/docs/api/utils.trainer.html - 2026-03-19T12:33:23.629Z + 2026-03-20T03:12:19.015Z https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html - 2026-03-19T12:33:23.439Z + 2026-03-20T03:12:18.828Z https://docs.axolotl.ai/docs/api/core.builders.base.html - 2026-03-19T12:33:22.503Z + 2026-03-20T03:12:17.897Z https://docs.axolotl.ai/docs/api/cli.utils.fetch.html - 2026-03-19T12:33:22.812Z + 2026-03-20T03:12:18.207Z https://docs.axolotl.ai/docs/api/utils.tokenization.html - 2026-03-19T12:33:23.578Z + 2026-03-20T03:12:18.964Z https://docs.axolotl.ai/docs/api/core.trainers.trl.html - 2026-03-19T12:33:22.878Z + 2026-03-20T03:12:18.272Z https://docs.axolotl.ai/docs/api/cli.checks.html - 2026-03-19T12:33:22.682Z + 2026-03-20T03:12:18.077Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html - 2026-03-19T12:33:23.247Z + 2026-03-20T03:12:18.638Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html - 2026-03-19T12:33:23.435Z + 2026-03-20T03:12:18.824Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html - 2026-03-19T12:33:23.447Z + 2026-03-20T03:12:18.836Z https://docs.axolotl.ai/docs/api/cli.utils.html - 2026-03-19T12:33:22.791Z + 2026-03-20T03:12:18.186Z https://docs.axolotl.ai/docs/api/common.architectures.html - 2026-03-19T12:33:24.059Z + 2026-03-20T03:12:19.440Z https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html - 2026-03-19T12:33:23.178Z + 2026-03-20T03:12:18.571Z https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html - 2026-03-19T12:33:23.508Z + 2026-03-20T03:12:18.896Z https://docs.axolotl.ai/docs/api/kernels.geglu.html - 2026-03-19T12:33:23.399Z + 2026-03-20T03:12:18.788Z https://docs.axolotl.ai/docs/api/utils.schedulers.html - 2026-03-19T12:33:23.663Z + 2026-03-20T03:12:19.048Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html - 2026-03-19T12:33:23.536Z + 2026-03-20T03:12:18.923Z https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html - 2026-03-19T12:33:24.053Z + 2026-03-20T03:12:19.434Z https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html - 2026-03-19T12:33:23.120Z + 2026-03-20T03:12:18.513Z https://docs.axolotl.ai/docs/api/cli.quantize.html - 2026-03-19T12:33:22.768Z + 2026-03-20T03:12:18.163Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html - 2026-03-19T12:33:23.222Z + 2026-03-20T03:12:18.614Z https://docs.axolotl.ai/docs/api/monkeypatch.utils.html - 2026-03-19T12:33:23.493Z + 2026-03-20T03:12:18.881Z https://docs.axolotl.ai/docs/api/utils.quantization.html - 2026-03-19T12:33:23.739Z + 2026-03-20T03:12:19.123Z https://docs.axolotl.ai/docs/api/loaders.patch_manager.html - 2026-03-19T12:33:22.981Z + 2026-03-20T03:12:18.375Z https://docs.axolotl.ai/docs/api/cli.utils.load.html - 2026-03-19T12:33:22.819Z + 2026-03-20T03:12:18.214Z https://docs.axolotl.ai/docs/api/common.const.html - 2026-03-19T12:33:24.061Z + 2026-03-20T03:12:19.442Z https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html - 2026-03-19T12:33:22.582Z + 2026-03-20T03:12:17.976Z https://docs.axolotl.ai/docs/api/utils.schemas.trl.html - 2026-03-19T12:33:23.811Z + 2026-03-20T03:12:19.196Z https://docs.axolotl.ai/docs/api/cli.main.html - 2026-03-19T12:33:22.624Z + 2026-03-20T03:12:18.017Z https://docs.axolotl.ai/docs/dataset-formats/pretraining.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/dataset-formats/tokenized.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/dataset-formats/conversation.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.035Z https://docs.axolotl.ai/docs/checkpoint_saving.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.035Z https://docs.axolotl.ai/docs/qat.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/custom_integrations.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.035Z https://docs.axolotl.ai/docs/rlhf.html - 2026-03-19T12:29:57.762Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/quantize.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/dataset_loading.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/nd_parallelism.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/input_output.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.040Z https://docs.axolotl.ai/docs/multi-node.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.040Z https://docs.axolotl.ai/docs/getting-started.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/optimizers.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/index.html - 2026-03-19T12:29:57.783Z + 2026-03-20T03:08:46.067Z https://docs.axolotl.ai/docs/mac.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.040Z https://docs.axolotl.ai/docs/optimizations.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/lr_groups.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.040Z https://docs.axolotl.ai/docs/amd_hpc.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.035Z https://docs.axolotl.ai/docs/mixed_precision.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.040Z https://docs.axolotl.ai/docs/unsloth.html - 2026-03-19T12:29:57.763Z + 2026-03-20T03:08:46.043Z https://docs.axolotl.ai/docs/sequence_parallelism.html - 2026-03-19T12:29:57.763Z + 2026-03-20T03:08:46.043Z https://docs.axolotl.ai/docs/streaming.html - 2026-03-19T12:29:57.763Z + 2026-03-20T03:08:46.043Z https://docs.axolotl.ai/docs/ray-integration.html - 2026-03-19T12:29:57.762Z + 2026-03-20T03:08:46.041Z https://docs.axolotl.ai/docs/config-reference.html - 2026-03-19T12:33:44.509Z + 2026-03-20T03:12:40.256Z https://docs.axolotl.ai/docs/telemetry.html - 2026-03-19T12:29:57.763Z + 2026-03-20T03:08:46.043Z https://docs.axolotl.ai/docs/dataset-formats/index.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/dataset-formats/template_free.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/fsdp_qlora.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/api/index.html - 2026-03-19T12:33:22.315Z + 2026-03-20T03:12:17.709Z https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html - 2026-03-19T12:33:23.277Z + 2026-03-20T03:12:18.668Z https://docs.axolotl.ai/docs/api/core.chat.format.shared.html - 2026-03-19T12:33:22.566Z + 2026-03-20T03:12:17.960Z https://docs.axolotl.ai/docs/api/cli.args.html - 2026-03-19T12:33:22.670Z + 2026-03-20T03:12:18.065Z https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html - 2026-03-19T12:33:24.081Z + 2026-03-20T03:12:19.462Z https://docs.axolotl.ai/docs/api/convert.html - 2026-03-19T12:33:22.431Z + 2026-03-20T03:12:17.825Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html - 2026-03-19T12:33:23.003Z + 2026-03-20T03:12:18.397Z https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html - 2026-03-19T12:33:22.789Z + 2026-03-20T03:12:18.184Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html - 2026-03-19T12:33:22.994Z + 2026-03-20T03:12:18.389Z https://docs.axolotl.ai/docs/api/prompt_tokenizers.html - 2026-03-19T12:33:22.483Z + 2026-03-20T03:12:17.878Z https://docs.axolotl.ai/docs/api/utils.data.streaming.html - 2026-03-19T12:33:23.707Z + 2026-03-20T03:12:19.092Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html - 2026-03-19T12:33:23.569Z + 2026-03-20T03:12:18.956Z https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html - 2026-03-19T12:33:24.045Z + 2026-03-20T03:12:19.426Z https://docs.axolotl.ai/docs/api/utils.freeze.html - 2026-03-19T12:33:23.607Z + 2026-03-20T03:12:18.994Z https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html - 2026-03-19T12:33:24.186Z + 2026-03-20T03:12:19.566Z https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html - 2026-03-19T12:33:24.179Z + 2026-03-20T03:12:19.560Z https://docs.axolotl.ai/docs/api/kernels.lora.html - 2026-03-19T12:33:23.386Z + 2026-03-20T03:12:18.775Z https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html - 2026-03-19T12:33:23.516Z + 2026-03-20T03:12:18.904Z https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html - 2026-03-19T12:33:23.170Z + 2026-03-20T03:12:18.562Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html - 2026-03-19T12:33:23.093Z + 2026-03-20T03:12:18.486Z https://docs.axolotl.ai/docs/api/core.datasets.chat.html - 2026-03-19T12:33:22.572Z + 2026-03-20T03:12:17.966Z https://docs.axolotl.ai/docs/api/cli.evaluate.html - 2026-03-19T12:33:22.645Z + 2026-03-20T03:12:18.037Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html - 2026-03-19T12:33:23.110Z + 2026-03-20T03:12:18.503Z https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html - 2026-03-19T12:33:23.273Z + 2026-03-20T03:12:18.664Z https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html - 2026-03-19T12:33:23.841Z + 2026-03-20T03:12:19.222Z https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html - 2026-03-19T12:33:24.116Z + 2026-03-20T03:12:19.497Z https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html - 2026-03-19T12:33:24.175Z + 2026-03-20T03:12:19.555Z https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html - 2026-03-19T12:33:23.075Z + 2026-03-20T03:12:18.469Z https://docs.axolotl.ai/docs/api/kernels.utils.html - 2026-03-19T12:33:23.428Z + 2026-03-20T03:12:18.817Z https://docs.axolotl.ai/docs/api/cli.vllm_serve.html - 2026-03-19T12:33:22.777Z + 2026-03-20T03:12:18.172Z https://docs.axolotl.ai/docs/api/core.trainers.mamba.html - 2026-03-19T12:33:22.884Z + 2026-03-20T03:12:18.279Z https://docs.axolotl.ai/docs/api/utils.bench.html - 2026-03-19T12:33:23.597Z + 2026-03-20T03:12:18.984Z https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html - 2026-03-19T12:33:22.826Z + 2026-03-20T03:12:18.221Z https://docs.axolotl.ai/docs/api/cli.merge_lora.html - 2026-03-19T12:33:22.738Z + 2026-03-20T03:12:18.133Z https://docs.axolotl.ai/docs/api/loaders.model.html - 2026-03-19T12:33:22.939Z + 2026-03-20T03:12:18.334Z https://docs.axolotl.ai/docs/api/cli.preprocess.html - 2026-03-19T12:33:22.762Z + 2026-03-20T03:12:18.157Z https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html - 2026-03-19T12:33:24.181Z + 2026-03-20T03:12:19.562Z https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html - 2026-03-19T12:33:23.165Z + 2026-03-20T03:12:18.558Z https://docs.axolotl.ai/docs/api/utils.schemas.enums.html - 2026-03-19T12:33:23.852Z + 2026-03-20T03:12:19.233Z https://docs.axolotl.ai/docs/api/kernels.quantize.html - 2026-03-19T12:33:23.426Z + 2026-03-20T03:12:18.815Z https://docs.axolotl.ai/docs/api/utils.schemas.model.html - 2026-03-19T12:33:23.765Z + 2026-03-20T03:12:19.149Z https://docs.axolotl.ai/docs/api/utils.collators.core.html - 2026-03-19T12:33:24.083Z + 2026-03-20T03:12:19.464Z https://docs.axolotl.ai/docs/api/core.builders.rl.html - 2026-03-19T12:33:22.515Z + 2026-03-20T03:12:17.909Z https://docs.axolotl.ai/docs/api/core.builders.causal.html - 2026-03-19T12:33:22.509Z + 2026-03-20T03:12:17.903Z https://docs.axolotl.ai/docs/api/utils.distributed.html - 2026-03-19T12:33:23.689Z + 2026-03-20T03:12:19.073Z https://docs.axolotl.ai/docs/api/train.html - 2026-03-19T12:33:22.393Z + 2026-03-20T03:12:17.787Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html - 2026-03-19T12:33:23.192Z + 2026-03-20T03:12:18.584Z https://docs.axolotl.ai/docs/api/integrations.base.html - 2026-03-19T12:33:24.030Z + 2026-03-20T03:12:19.411Z https://docs.axolotl.ai/docs/api/core.chat.messages.html - 2026-03-19T12:33:22.560Z + 2026-03-20T03:12:17.954Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html - 2026-03-19T12:33:22.926Z + 2026-03-20T03:12:18.320Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html - 2026-03-19T12:33:23.205Z + 2026-03-20T03:12:18.597Z https://docs.axolotl.ai/docs/api/integrations.liger.args.html - 2026-03-19T12:33:24.049Z + 2026-03-20T03:12:19.430Z https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html - 2026-03-19T12:33:23.518Z + 2026-03-20T03:12:18.906Z https://docs.axolotl.ai/docs/api/logging_config.html - 2026-03-19T12:33:22.495Z + 2026-03-20T03:12:17.890Z https://docs.axolotl.ai/docs/api/common.datasets.html - 2026-03-19T12:33:24.080Z + 2026-03-20T03:12:19.461Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html - 2026-03-19T12:33:23.437Z + 2026-03-20T03:12:18.826Z https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html - 2026-03-19T12:33:23.136Z + 2026-03-20T03:12:18.529Z https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html - 2026-03-19T12:33:23.796Z + 2026-03-20T03:12:19.180Z https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html - 2026-03-19T12:33:23.504Z + 2026-03-20T03:12:18.892Z https://docs.axolotl.ai/docs/api/cli.utils.train.html - 2026-03-19T12:33:22.841Z + 2026-03-20T03:12:18.235Z https://docs.axolotl.ai/docs/api/loaders.constants.html - 2026-03-19T12:33:22.983Z + 2026-03-20T03:12:18.377Z https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html - 2026-03-19T12:33:23.143Z + 2026-03-20T03:12:18.536Z https://docs.axolotl.ai/docs/api/loaders.tokenizer.html - 2026-03-19T12:33:22.950Z + 2026-03-20T03:12:18.345Z https://docs.axolotl.ai/docs/api/core.training_args.html - 2026-03-19T12:33:22.531Z + 2026-03-20T03:12:17.925Z https://docs.axolotl.ai/docs/api/loaders.processor.html - 2026-03-19T12:33:22.952Z + 2026-03-20T03:12:18.347Z https://docs.axolotl.ai/docs/api/prompt_strategies.base.html - 2026-03-19T12:33:23.034Z + 2026-03-20T03:12:18.428Z https://docs.axolotl.ai/docs/api/utils.collators.mamba.html - 2026-03-19T12:33:24.111Z + 2026-03-20T03:12:19.491Z https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html - 2026-03-19T12:33:23.483Z + 2026-03-20T03:12:18.871Z https://docs.axolotl.ai/docs/api/utils.schemas.peft.html - 2026-03-19T12:33:23.807Z + 2026-03-20T03:12:19.191Z https://docs.axolotl.ai/docs/api/utils.data.sft.html - 2026-03-19T12:33:23.715Z + 2026-03-20T03:12:19.099Z https://docs.axolotl.ai/docs/api/core.trainers.utils.html - 2026-03-19T12:33:22.927Z + 2026-03-20T03:12:18.322Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html - 2026-03-19T12:33:23.497Z + 2026-03-20T03:12:18.885Z https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html - 2026-03-19T12:33:23.495Z + 2026-03-20T03:12:18.883Z https://docs.axolotl.ai/docs/api/loaders.adapter.html - 2026-03-19T12:33:22.959Z + 2026-03-20T03:12:18.354Z https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html - 2026-03-19T12:33:23.151Z + 2026-03-20T03:12:18.544Z https://docs.axolotl.ai/docs/attention.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.035Z https://docs.axolotl.ai/docs/docker.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/dataset_preprocessing.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/gradient_checkpointing.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/cli.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.035Z https://docs.axolotl.ai/docs/lora_optims.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.040Z https://docs.axolotl.ai/docs/models/index.html - 2026-03-19T12:33:45.410Z + 2026-03-20T03:12:41.362Z https://docs.axolotl.ai/docs/models/devstral.html - 2026-03-19T12:33:45.404Z + 2026-03-20T03:12:41.329Z https://docs.axolotl.ai/docs/models/mistral-small.html - 2026-03-19T12:33:45.404Z + 2026-03-20T03:12:41.327Z https://docs.axolotl.ai/docs/models/jamba.html - 2026-03-19T12:33:45.409Z + 2026-03-20T03:12:41.361Z https://docs.axolotl.ai/docs/models/olmo3.html - 2026-03-19T12:33:45.398Z + 2026-03-20T03:12:41.322Z https://docs.axolotl.ai/docs/models/smolvlm2.html - 2026-03-19T12:33:45.408Z + 2026-03-20T03:12:41.335Z https://docs.axolotl.ai/docs/models/kimi-linear.html - 2026-03-19T12:33:45.397Z + 2026-03-20T03:12:41.319Z https://docs.axolotl.ai/docs/models/llama-2.html - 2026-03-19T12:33:45.405Z + 2026-03-20T03:12:41.330Z https://docs.axolotl.ai/docs/models/qwen3-next.html - 2026-03-19T12:33:45.405Z + 2026-03-20T03:12:41.330Z https://docs.axolotl.ai/docs/models/gpt-oss.html - 2026-03-19T12:33:45.407Z + 2026-03-20T03:12:41.332Z https://docs.axolotl.ai/docs/models/mimo.html - 2026-03-19T12:33:45.397Z + 2026-03-20T03:12:41.321Z https://docs.axolotl.ai/docs/models/magistral/vision.html - 2026-03-19T12:33:45.403Z + 2026-03-20T03:12:41.326Z https://docs.axolotl.ai/docs/models/internvl3_5.html - 2026-03-19T12:33:45.398Z + 2026-03-20T03:12:41.321Z https://docs.axolotl.ai/docs/models/orpheus.html - 2026-03-19T12:33:45.410Z + 2026-03-20T03:12:41.362Z https://docs.axolotl.ai/docs/models/seed-oss.html - 2026-03-19T12:33:45.407Z + 2026-03-20T03:12:41.333Z https://docs.axolotl.ai/docs/models/granite4.html - 2026-03-19T12:33:45.408Z + 2026-03-20T03:12:41.335Z https://docs.axolotl.ai/docs/models/ministral3/think.html - 2026-03-19T12:33:45.400Z + 2026-03-20T03:12:41.324Z https://docs.axolotl.ai/docs/installation.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.040Z https://docs.axolotl.ai/docs/expert_quantization.html - 2026-03-19T12:29:57.758Z + 2026-03-20T03:08:46.036Z https://docs.axolotl.ai/docs/inference.html - 2026-03-19T12:29:57.761Z + 2026-03-20T03:08:46.040Z https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html - 2026-03-19T12:29:57.789Z + 2026-03-20T03:08:46.073Z https://docs.axolotl.ai/FAQS.html - 2026-03-19T12:29:57.756Z + 2026-03-20T03:08:46.032Z