diff --git a/.nojekyll b/.nojekyll index 20883d18e..32cefbaa3 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -ecff14dc \ No newline at end of file +c9ec9f73 \ No newline at end of file diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html index b7b3348b9..11dfc0712 100644 --- a/docs/custom_integrations.html +++ b/docs/custom_integrations.html @@ -558,7 +558,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); -
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@78b2a45713a54c9bedf8b33f5e31cf07a1a57154"
+
pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@622068a"

Usage

diff --git a/docs/faq.html b/docs/faq.html index 86e06d60c..04547718c 100644 --- a/docs/faq.html +++ b/docs/faq.html @@ -551,6 +551,14 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});

A: This is because you may be using preprocess CLI with pretraining_dataset: or skip_prepare_dataset: true respectively. Please use axolotl train CLI directly instead as these datasets are prepared on demand.

+

Q: vLLM is not working with Axolotl

+
+

A: We currently recommend torch 2.6.0 for use with vllm. Please ensure you use the right version. For Docker, please use the main-py3.11-cu124-2.6.0 tag.

+
+

Q: FA2 2.8.0 undefined symbol runtime error on CUDA 12.4

+
+

A: There seems to be a wheel issue with FA2 2.8.0 on CUDA 12.4. Try CUDA 12.6 instead or downgrade to FA2 2.7.4. Please refer to the upstream issue: https://github.com/Dao-AILab/flash-attention/issues/1717.

+

Chat templates

diff --git a/search.json b/search.json index 80cf626e9..4934d6bb7 100644 --- a/search.json +++ b/search.json @@ -3064,7 +3064,7 @@ "href": "docs/custom_integrations.html#cut-cross-entropy", "title": "Custom Integrations", "section": "Cut Cross Entropy", - "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@78b2a45713a54c9bedf8b33f5e31cf07a1a57154\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\ncohere\ncohere2\ngemma\ngemma2\ngemma3\ngemma3_text\nglm\nglm4\nllama\nllama4\nllama4_text\nmistral\nmistral3\nmllama\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_vl\nqwen2_moe\nqwen2_5_vl\nqwen3\nqwen3_moe\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", + "text": "Cut Cross Entropy\nCut Cross Entropy (CCE) reduces VRAM usage through optimization on the cross-entropy operation during loss calculation.\nSee https://github.com/apple/ml-cross-entropy\n\nRequirements\n\nPyTorch 2.4.0 or higher\n\n\n\nInstallation\nRun the following command to install cut_cross_entropy[transformers] if you don’t have it already.\n\nIf you are in dev environment\n\npython scripts/cutcrossentropy_install.py | sh\n\nIf you are installing from pip\n\npip3 uninstall -y cut-cross-entropy && pip3 install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@622068a\"\n\n\nUsage\nplugins:\n - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin\n\n\nSupported Models\n\ncohere\ncohere2\ngemma\ngemma2\ngemma3\ngemma3_text\nglm\nglm4\nllama\nllama4\nllama4_text\nmistral\nmistral3\nmllama\nphi\nphi3\nphi4_multimodal\nqwen2\nqwen2_vl\nqwen2_moe\nqwen2_5_vl\nqwen3\nqwen3_moe\n\n\n\nCitation\n@article{wijmans2024cut,\n author = {Erik Wijmans and\n Brody Huval and\n Alexander Hertzberg and\n Vladlen Koltun and\n Philipp Kr\\\"ahenb\\\"uhl},\n title = {Cut Your Losses in Large-Vocabulary Language Models},\n journal = {arXiv},\n year = {2024},\n url = {https://arxiv.org/abs/2411.09009},\n}\nPlease see reference here", "crumbs": [ "Advanced Features", "Custom Integrations" @@ -3218,7 +3218,7 @@ "href": "docs/faq.html", "title": "FAQ", "section": "", - "text": "General\nQ: The trainer stopped and hasn’t progressed in several minutes.\n\nA: Usually an issue with the GPUs communicating with each other. See the NCCL doc\n\nQ: exitcode: -9\n\nA: This usually happens when you run out of system RAM.\n\nQ: exitcode: -7 while using deepspeed\n\nA: Try upgrading deepspeed w: pip install -U deepspeed\n\nQ: AttributeError: ‘DummyOptim’ object has no attribute ‘step’\nQ: ModuleNotFoundError: No module named ‘mpi4py’ using single GPU with deepspeed\n\nA: You may be using deepspeed with single gpu. Please remove the deepspeed: section in the yaml file or --deepspeed CLI flag.\n\nQ: The codes is stuck on saving preprocessed datasets.\n\nA: This is usually an issue with the GPU. This can be resolved through setting the os environment variable CUDA_VISIBLE_DEVICES=0. If you are on runpod, this is usually a pod issue. Starting a new pod should take care of it.\n\nQ: Received mismatch error on merge adapters / loading adapters between torch.Size of checkpoint and model.\n\nA: This is likely due to vocab size mismatch. By default, Axolotl expands the model’s embeddings if the tokenizer has more tokens than the model. Please use the axolotl merge-lora command to merge the adapters instead of using your own scripts.\n\n\nOn the other hand, if the model has more tokens than the tokenizer, Axolotl does not shrink the model’s embeddings unless shrink_embeddings: true is set in the config.\n\nQ: How to call Axolotl via custom python scripts?\n\nA: Since Axolotl is just Python, please see src/axolotl/cli/main.py on how each command is called.\n\nQ: How to know the value to use for fsdp_transformer_layer_cls_to_wrap?\n\nA: This is the class name of the transformer layer to wrap with FSDP. For example, for LlamaForCausalLM, the value is LlamaDecoderLayer. To find this for a specific model, check the model’s PreTrainedModel definition and look for _no_split_modules variable in the modeling_<model_name>.py file within transformers library.\n\nQ: ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as pad_token\n\nA: This is because the tokenizer does not have a padding token. Please add a padding token to the tokenizer via:\n\n\nspecial_tokens:\n # str. If you're not sure, set to same as `eos_token`.\n pad_token: \"...\"\n\nQ: IterableDataset error or KeyError: 'input_ids' when using preprocess CLI\n\nA: This is because you may be using preprocess CLI with pretraining_dataset: or skip_prepare_dataset: true respectively. Please use axolotl train CLI directly instead as these datasets are prepared on demand.\n\n\n\nChat templates\nQ: jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' / 'role' / ____\n\nA: This means that the property mapping for the stated attribute does not exist when building chat_template prompt. For example, if no attribute 'content', please check you have added the correct mapping for content under message_property_mappings.\n\nQ: Empty template generated for turn ___\n\nA: The content is empty for that turn.\n\nQ: Could not find content start/end boundary for turn __\n\nA: The specific turn’s start/end could not be detected. Please ensure you have set the eos_token following your chat_template. Otherwise, this could be a chat_template which doesn’t use proper boundaries for each turn (like system). On the rare occurrence, make sure your content is not [[dummy_message]]. Please let us know about this.\n\nQ: Content end boundary is before start boundary for turn ___\n\nA: This is an edge case which should not occur. Please create an Issue if this happens.\n\nQ: Content end boundary is the same as start boundary for turn ___. This is likely an empty turn.\n\nA: This is likely an empty turn.\n\nQ: The EOS token is incorrectly being masked or not being masked / EOS token __ not found in chat template.\n\nA: There can be two reasons:\n\n\n\nThis is because of the mismatch between tokenizer.eos_token and EOS token in template. Please make sure to set eos_token: under special_tokens: to the same EOS token as in template.\n\n\n\n\nThe EOS token is not in the template. Please check if your template is correct. As an example, phi_35 template does not use its dedicated EOS token <|endoftext|> at the end.\n\n\nQ: “chat_template choice is tokenizer_default but tokenizer’s chat_template is null. Please add a chat_template in tokenizer config”\n\nA: This is because the tokenizer does not have a chat template. Please add a chat template in the tokenizer config. See chat_template for more details.\n\nQ: The EOT token(s) are incorrectly being masked or not being masked / EOT token __ not found in chat template.\n\nA: There can be two reasons:\n\n\n\nThe EOT token is different from the EOS token and was not specified under eot_tokens:. Please set eot_tokens: to the same EOT token(s) as in template.\n\n\n\n\nThere is more than one EOT token per turn in the template. Please raise an issue with examples as we recognize this as an edge case.\n\n\nQ: EOT token encoding failed. Please check if the token is valid and can be encoded.\n\nA: There could be some issue with the tokenizer or unicode encoding. Please raise an issue with examples with the EOT token & tokenizer causing the issue.\n\nQ: EOT token __ is encoded as multiple tokens.\n\nA: This is because the EOT token is encoded as multiple tokens which can cause unexpected behavior. Please add it under tokens: or (recommended) override unused added_tokens via added_tokens_overrides:.\n\nQ: Conflict between train_on_eos and train_on_eot. eos_token is in eot_tokens and train_on_eos != train_on_eot\n\nA: This is because the EOS token is in the eot_tokens: while mismatch between train_on_eos: and train_on_eot:. This will cause one to override the other. Please ensure that train_on_eos: and train_on_eot: are the same or remove the EOS token from eot_tokens:.\n\nQ: If eot_tokens: is not provided, what happens?\n\nA: If eot_tokens: is not provided, the default behavior is the same as before. EOS tokens used to delimit turns are masked/unmasked depending on whether the turn is trainable.\n\n\nInternally, eot_tokens: tokenizer.eos_token and train_on_eot: train_on_eos (which defaults to turn). This transition helps clarify the naming and behavior of EOT/EOS tokens.\n\nQ: Data processing error: CAS service error\n\nA: Try disabling XET with export HF_HUB_DISABLE_XET=1\n\nQ: torch._inductor.exc.LoweringException: NoValidChoicesError: No choices to select, please consider adding ATEN into max_autotune_gemm_backends config (defined in torch/_inductor/config.py) to allow at least one choice.\n\nA: Depending on the version of torch, you may need to include this in your YAML:\n\n\nflex_attn_compile_kwargs:\n dynamic: false\n mode: max-autotune-no-cudagraphs", + "text": "General\nQ: The trainer stopped and hasn’t progressed in several minutes.\n\nA: Usually an issue with the GPUs communicating with each other. See the NCCL doc\n\nQ: exitcode: -9\n\nA: This usually happens when you run out of system RAM.\n\nQ: exitcode: -7 while using deepspeed\n\nA: Try upgrading deepspeed w: pip install -U deepspeed\n\nQ: AttributeError: ‘DummyOptim’ object has no attribute ‘step’\nQ: ModuleNotFoundError: No module named ‘mpi4py’ using single GPU with deepspeed\n\nA: You may be using deepspeed with single gpu. Please remove the deepspeed: section in the yaml file or --deepspeed CLI flag.\n\nQ: The codes is stuck on saving preprocessed datasets.\n\nA: This is usually an issue with the GPU. This can be resolved through setting the os environment variable CUDA_VISIBLE_DEVICES=0. If you are on runpod, this is usually a pod issue. Starting a new pod should take care of it.\n\nQ: Received mismatch error on merge adapters / loading adapters between torch.Size of checkpoint and model.\n\nA: This is likely due to vocab size mismatch. By default, Axolotl expands the model’s embeddings if the tokenizer has more tokens than the model. Please use the axolotl merge-lora command to merge the adapters instead of using your own scripts.\n\n\nOn the other hand, if the model has more tokens than the tokenizer, Axolotl does not shrink the model’s embeddings unless shrink_embeddings: true is set in the config.\n\nQ: How to call Axolotl via custom python scripts?\n\nA: Since Axolotl is just Python, please see src/axolotl/cli/main.py on how each command is called.\n\nQ: How to know the value to use for fsdp_transformer_layer_cls_to_wrap?\n\nA: This is the class name of the transformer layer to wrap with FSDP. For example, for LlamaForCausalLM, the value is LlamaDecoderLayer. To find this for a specific model, check the model’s PreTrainedModel definition and look for _no_split_modules variable in the modeling_<model_name>.py file within transformers library.\n\nQ: ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as pad_token\n\nA: This is because the tokenizer does not have a padding token. Please add a padding token to the tokenizer via:\n\n\nspecial_tokens:\n # str. If you're not sure, set to same as `eos_token`.\n pad_token: \"...\"\n\nQ: IterableDataset error or KeyError: 'input_ids' when using preprocess CLI\n\nA: This is because you may be using preprocess CLI with pretraining_dataset: or skip_prepare_dataset: true respectively. Please use axolotl train CLI directly instead as these datasets are prepared on demand.\n\nQ: vLLM is not working with Axolotl\n\nA: We currently recommend torch 2.6.0 for use with vllm. Please ensure you use the right version. For Docker, please use the main-py3.11-cu124-2.6.0 tag.\n\nQ: FA2 2.8.0 undefined symbol runtime error on CUDA 12.4\n\nA: There seems to be a wheel issue with FA2 2.8.0 on CUDA 12.4. Try CUDA 12.6 instead or downgrade to FA2 2.7.4. Please refer to the upstream issue: https://github.com/Dao-AILab/flash-attention/issues/1717.\n\n\n\nChat templates\nQ: jinja2.exceptions.UndefinedError: 'dict object' has no attribute 'content' / 'role' / ____\n\nA: This means that the property mapping for the stated attribute does not exist when building chat_template prompt. For example, if no attribute 'content', please check you have added the correct mapping for content under message_property_mappings.\n\nQ: Empty template generated for turn ___\n\nA: The content is empty for that turn.\n\nQ: Could not find content start/end boundary for turn __\n\nA: The specific turn’s start/end could not be detected. Please ensure you have set the eos_token following your chat_template. Otherwise, this could be a chat_template which doesn’t use proper boundaries for each turn (like system). On the rare occurrence, make sure your content is not [[dummy_message]]. Please let us know about this.\n\nQ: Content end boundary is before start boundary for turn ___\n\nA: This is an edge case which should not occur. Please create an Issue if this happens.\n\nQ: Content end boundary is the same as start boundary for turn ___. This is likely an empty turn.\n\nA: This is likely an empty turn.\n\nQ: The EOS token is incorrectly being masked or not being masked / EOS token __ not found in chat template.\n\nA: There can be two reasons:\n\n\n\nThis is because of the mismatch between tokenizer.eos_token and EOS token in template. Please make sure to set eos_token: under special_tokens: to the same EOS token as in template.\n\n\n\n\nThe EOS token is not in the template. Please check if your template is correct. As an example, phi_35 template does not use its dedicated EOS token <|endoftext|> at the end.\n\n\nQ: “chat_template choice is tokenizer_default but tokenizer’s chat_template is null. Please add a chat_template in tokenizer config”\n\nA: This is because the tokenizer does not have a chat template. Please add a chat template in the tokenizer config. See chat_template for more details.\n\nQ: The EOT token(s) are incorrectly being masked or not being masked / EOT token __ not found in chat template.\n\nA: There can be two reasons:\n\n\n\nThe EOT token is different from the EOS token and was not specified under eot_tokens:. Please set eot_tokens: to the same EOT token(s) as in template.\n\n\n\n\nThere is more than one EOT token per turn in the template. Please raise an issue with examples as we recognize this as an edge case.\n\n\nQ: EOT token encoding failed. Please check if the token is valid and can be encoded.\n\nA: There could be some issue with the tokenizer or unicode encoding. Please raise an issue with examples with the EOT token & tokenizer causing the issue.\n\nQ: EOT token __ is encoded as multiple tokens.\n\nA: This is because the EOT token is encoded as multiple tokens which can cause unexpected behavior. Please add it under tokens: or (recommended) override unused added_tokens via added_tokens_overrides:.\n\nQ: Conflict between train_on_eos and train_on_eot. eos_token is in eot_tokens and train_on_eos != train_on_eot\n\nA: This is because the EOS token is in the eot_tokens: while mismatch between train_on_eos: and train_on_eot:. This will cause one to override the other. Please ensure that train_on_eos: and train_on_eot: are the same or remove the EOS token from eot_tokens:.\n\nQ: If eot_tokens: is not provided, what happens?\n\nA: If eot_tokens: is not provided, the default behavior is the same as before. EOS tokens used to delimit turns are masked/unmasked depending on whether the turn is trainable.\n\n\nInternally, eot_tokens: tokenizer.eos_token and train_on_eot: train_on_eos (which defaults to turn). This transition helps clarify the naming and behavior of EOT/EOS tokens.\n\nQ: Data processing error: CAS service error\n\nA: Try disabling XET with export HF_HUB_DISABLE_XET=1\n\nQ: torch._inductor.exc.LoweringException: NoValidChoicesError: No choices to select, please consider adding ATEN into max_autotune_gemm_backends config (defined in torch/_inductor/config.py) to allow at least one choice.\n\nA: Depending on the version of torch, you may need to include this in your YAML:\n\n\nflex_attn_compile_kwargs:\n dynamic: false\n mode: max-autotune-no-cudagraphs", "crumbs": [ "Troubleshooting", "FAQ" diff --git a/sitemap.xml b/sitemap.xml index f25f981e0..5700680f7 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,758 +2,758 @@ https://docs.axolotl.ai/docs/unsloth.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.645Z https://docs.axolotl.ai/docs/dataset-formats/conversation.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/dataset-formats/tokenized.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/mac.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/nccl.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/multi-node.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/docker.html - 2025-07-07T14:11:57.261Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/lr_groups.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/inference.html - 2025-07-07T14:11:57.263Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/cli.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/config-reference.html - 2025-07-07T14:15:12.923Z + 2025-07-07T18:17:00.170Z https://docs.axolotl.ai/docs/multi-gpu.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/debugging.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/multimodal.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/api/cli.sweeps.html - 2025-07-07T14:14:59.594Z + 2025-07-07T18:16:47.224Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html - 2025-07-07T14:14:59.917Z + 2025-07-07T18:16:47.552Z https://docs.axolotl.ai/docs/api/utils.schedulers.html - 2025-07-07T14:15:00.295Z + 2025-07-07T18:16:47.934Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html - 2025-07-07T14:15:00.112Z + 2025-07-07T18:16:47.749Z https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html - 2025-07-07T14:14:59.642Z + 2025-07-07T18:16:47.272Z https://docs.axolotl.ai/docs/api/kernels.geglu.html - 2025-07-07T14:15:00.066Z + 2025-07-07T18:16:47.702Z https://docs.axolotl.ai/docs/api/core.trainers.utils.html - 2025-07-07T14:14:59.714Z + 2025-07-07T18:16:47.344Z https://docs.axolotl.ai/docs/api/core.datasets.chat.html - 2025-07-07T14:14:59.447Z + 2025-07-07T18:16:47.076Z https://docs.axolotl.ai/docs/api/utils.schemas.peft.html - 2025-07-07T14:15:00.406Z + 2025-07-07T18:16:48.046Z https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html - 2025-07-07T14:15:00.173Z + 2025-07-07T18:16:47.811Z https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html - 2025-07-07T14:14:59.820Z + 2025-07-07T18:16:47.453Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html - 2025-07-07T14:14:59.949Z + 2025-07-07T18:16:47.584Z https://docs.axolotl.ai/docs/api/cli.cloud.base.html - 2025-07-07T14:14:59.636Z + 2025-07-07T18:16:47.266Z https://docs.axolotl.ai/docs/api/kernels.swiglu.html - 2025-07-07T14:15:00.077Z + 2025-07-07T18:16:47.713Z https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html - 2025-07-07T14:14:59.884Z + 2025-07-07T18:16:47.518Z https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html - 2025-07-07T14:14:59.973Z + 2025-07-07T18:16:47.608Z https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html - 2025-07-07T14:14:59.874Z + 2025-07-07T18:16:47.508Z https://docs.axolotl.ai/docs/api/kernels.utils.html - 2025-07-07T14:15:00.085Z + 2025-07-07T18:16:47.721Z https://docs.axolotl.ai/docs/api/common.datasets.html - 2025-07-07T14:15:00.616Z + 2025-07-07T18:16:48.258Z https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html - 2025-07-07T14:15:00.397Z + 2025-07-07T18:16:48.037Z https://docs.axolotl.ai/docs/api/core.builders.rl.html - 2025-07-07T14:14:59.402Z + 2025-07-07T18:16:47.031Z https://docs.axolotl.ai/docs/api/evaluate.html - 2025-07-07T14:14:59.306Z + 2025-07-07T18:16:46.935Z https://docs.axolotl.ai/docs/api/kernels.quantize.html - 2025-07-07T14:15:00.084Z + 2025-07-07T18:16:47.720Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html - 2025-07-07T14:15:00.110Z + 2025-07-07T18:16:47.748Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html - 2025-07-07T14:14:59.757Z + 2025-07-07T18:16:47.388Z https://docs.axolotl.ai/docs/api/integrations.base.html - 2025-07-07T14:15:00.576Z + 2025-07-07T18:16:48.218Z https://docs.axolotl.ai/docs/api/cli.merge_lora.html - 2025-07-07T14:14:59.568Z + 2025-07-07T18:16:47.197Z https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html - 2025-07-07T14:14:59.580Z + 2025-07-07T18:16:47.209Z https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html - 2025-07-07T14:15:00.190Z + 2025-07-07T18:16:47.828Z https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html - 2025-07-07T14:14:59.868Z + 2025-07-07T18:16:47.502Z https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html - 2025-07-07T14:15:00.645Z + 2025-07-07T18:16:48.287Z https://docs.axolotl.ai/docs/api/utils.data.sft.html - 2025-07-07T14:15:00.333Z + 2025-07-07T18:16:47.973Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html - 2025-07-07T14:14:59.835Z + 2025-07-07T18:16:47.469Z https://docs.axolotl.ai/docs/api/integrations.liger.args.html - 2025-07-07T14:15:00.591Z + 2025-07-07T18:16:48.233Z https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html - 2025-07-07T14:15:00.126Z + 2025-07-07T18:16:47.764Z https://docs.axolotl.ai/docs/api/cli.vllm_serve.html - 2025-07-07T14:14:59.632Z + 2025-07-07T18:16:47.262Z https://docs.axolotl.ai/docs/api/monkeypatch.utils.html - 2025-07-07T14:15:00.172Z + 2025-07-07T18:16:47.810Z https://docs.axolotl.ai/docs/api/loaders.patch_manager.html - 2025-07-07T14:14:59.746Z + 2025-07-07T18:16:47.377Z https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html - 2025-07-07T14:15:00.426Z + 2025-07-07T18:16:48.067Z https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html - 2025-07-07T14:15:00.692Z + 2025-07-07T18:16:48.335Z https://docs.axolotl.ai/docs/api/cli.utils.html - 2025-07-07T14:14:59.625Z + 2025-07-07T18:16:47.255Z https://docs.axolotl.ai/docs/api/utils.schemas.config.html - 2025-07-07T14:15:00.368Z + 2025-07-07T18:16:48.007Z https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html - 2025-07-07T14:14:59.879Z + 2025-07-07T18:16:47.514Z https://docs.axolotl.ai/docs/api/utils.distributed.html - 2025-07-07T14:15:00.314Z + 2025-07-07T18:16:47.953Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html - 2025-07-07T14:15:00.224Z + 2025-07-07T18:16:47.862Z https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html - 2025-07-07T14:15:00.184Z + 2025-07-07T18:16:47.822Z https://docs.axolotl.ai/docs/api/core.builders.base.html - 2025-07-07T14:14:59.389Z + 2025-07-07T18:16:47.018Z https://docs.axolotl.ai/docs/api/core.trainers.trl.html - 2025-07-07T14:14:59.673Z + 2025-07-07T18:16:47.303Z https://docs.axolotl.ai/docs/api/cli.evaluate.html - 2025-07-07T14:14:59.502Z + 2025-07-07T18:16:47.132Z https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html - 2025-07-07T14:15:00.325Z + 2025-07-07T18:16:47.964Z https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html - 2025-07-07T14:15:00.711Z + 2025-07-07T18:16:48.354Z https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html - 2025-07-07T14:14:59.690Z + 2025-07-07T18:16:47.320Z https://docs.axolotl.ai/docs/api/core.chat.format.shared.html - 2025-07-07T14:14:59.442Z + 2025-07-07T18:16:47.071Z https://docs.axolotl.ai/docs/api/monkeypatch.relora.html - 2025-07-07T14:15:00.134Z + 2025-07-07T18:16:47.772Z https://docs.axolotl.ai/docs/api/cli.config.html - 2025-07-07T14:14:59.545Z + 2025-07-07T18:16:47.175Z https://docs.axolotl.ai/docs/api/cli.preprocess.html - 2025-07-07T14:14:59.588Z + 2025-07-07T18:16:47.218Z https://docs.axolotl.ai/docs/api/core.trainers.base.html - 2025-07-07T14:14:59.657Z + 2025-07-07T18:16:47.287Z https://docs.axolotl.ai/docs/api/convert.html - 2025-07-07T14:14:59.330Z + 2025-07-07T18:16:46.959Z https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html - 2025-07-07T14:14:59.901Z + 2025-07-07T18:16:47.536Z https://docs.axolotl.ai/docs/api/utils.schemas.trl.html - 2025-07-07T14:15:00.409Z + 2025-07-07T18:16:48.049Z https://docs.axolotl.ai/docs/api/cli.args.html - 2025-07-07T14:14:59.521Z + 2025-07-07T18:16:47.151Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html - 2025-07-07T14:14:59.907Z + 2025-07-07T18:16:47.541Z https://docs.axolotl.ai/docs/api/loaders.constants.html - 2025-07-07T14:14:59.748Z + 2025-07-07T18:16:47.379Z https://docs.axolotl.ai/docs/api/logging_config.html - 2025-07-07T14:14:59.383Z + 2025-07-07T18:16:47.012Z https://docs.axolotl.ai/docs/api/cli.inference.html - 2025-07-07T14:14:59.560Z + 2025-07-07T18:16:47.189Z https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html - 2025-07-07T14:14:59.786Z + 2025-07-07T18:16:47.418Z https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html - 2025-07-07T14:15:00.598Z + 2025-07-07T18:16:48.240Z https://docs.axolotl.ai/docs/api/utils.schemas.training.html - 2025-07-07T14:15:00.380Z + 2025-07-07T18:16:48.019Z https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html - 2025-07-07T14:14:59.894Z + 2025-07-07T18:16:47.529Z https://docs.axolotl.ai/docs/api/utils.freeze.html - 2025-07-07T14:15:00.254Z + 2025-07-07T18:16:47.892Z https://docs.axolotl.ai/docs/api/loaders.tokenizer.html - 2025-07-07T14:14:59.731Z + 2025-07-07T18:16:47.362Z https://docs.axolotl.ai/docs/api/utils.bench.html - 2025-07-07T14:15:00.246Z + 2025-07-07T18:16:47.884Z https://docs.axolotl.ai/docs/api/utils.quantization.html - 2025-07-07T14:15:00.354Z + 2025-07-07T18:16:47.994Z https://docs.axolotl.ai/docs/batch_vs_grad.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.640Z https://docs.axolotl.ai/docs/input_output.html - 2025-07-07T14:11:57.263Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/sequence_parallelism.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.645Z https://docs.axolotl.ai/docs/reward_modelling.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/index.html - 2025-07-07T14:11:57.278Z + 2025-07-07T18:13:45.659Z https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html - 2025-07-07T14:11:57.282Z + 2025-07-07T18:13:45.663Z https://docs.axolotl.ai/FAQS.html - 2025-07-07T14:11:57.258Z + 2025-07-07T18:13:45.639Z https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html - 2025-07-07T14:11:57.283Z + 2025-07-07T18:13:45.663Z https://docs.axolotl.ai/TODO.html - 2025-07-07T14:11:57.258Z + 2025-07-07T18:13:45.639Z https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html - 2025-07-07T14:11:57.266Z + 2025-07-07T18:13:45.646Z https://docs.axolotl.ai/docs/torchao.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.645Z https://docs.axolotl.ai/docs/ray-integration.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/quantize.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/qat.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/api/utils.lora.html - 2025-07-07T14:15:00.237Z + 2025-07-07T18:16:47.875Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html - 2025-07-07T14:14:59.847Z + 2025-07-07T18:16:47.481Z https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html - 2025-07-07T14:15:00.180Z + 2025-07-07T18:16:47.819Z https://docs.axolotl.ai/docs/api/utils.collators.core.html - 2025-07-07T14:15:00.618Z + 2025-07-07T18:16:48.260Z https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html - 2025-07-07T14:14:59.891Z + 2025-07-07T18:16:47.525Z https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html - 2025-07-07T14:15:00.696Z + 2025-07-07T18:16:48.338Z https://docs.axolotl.ai/docs/api/utils.data.pretraining.html - 2025-07-07T14:15:00.327Z + 2025-07-07T18:16:47.966Z https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html - 2025-07-07T14:15:00.697Z + 2025-07-07T18:16:48.340Z https://docs.axolotl.ai/docs/api/utils.trainer.html - 2025-07-07T14:15:00.270Z + 2025-07-07T18:16:47.909Z https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html - 2025-07-07T14:15:00.580Z + 2025-07-07T18:16:48.221Z https://docs.axolotl.ai/docs/api/utils.schemas.model.html - 2025-07-07T14:15:00.374Z + 2025-07-07T18:16:48.014Z https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html - 2025-07-07T14:15:00.193Z + 2025-07-07T18:16:47.831Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html - 2025-07-07T14:14:59.929Z + 2025-07-07T18:16:47.563Z https://docs.axolotl.ai/docs/api/datasets.html - 2025-07-07T14:14:59.317Z + 2025-07-07T18:16:46.946Z https://docs.axolotl.ai/docs/api/utils.schemas.enums.html - 2025-07-07T14:15:00.437Z + 2025-07-07T18:16:48.077Z https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html - 2025-07-07T14:15:00.588Z + 2025-07-07T18:16:48.230Z https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html - 2025-07-07T14:15:00.164Z + 2025-07-07T18:16:47.802Z https://docs.axolotl.ai/docs/api/utils.collators.batching.html - 2025-07-07T14:15:00.637Z + 2025-07-07T18:16:48.279Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html - 2025-07-07T14:14:59.712Z + 2025-07-07T18:16:47.343Z https://docs.axolotl.ai/docs/api/prompt_strategies.base.html - 2025-07-07T14:14:59.788Z + 2025-07-07T18:16:47.420Z https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html - 2025-07-07T14:15:00.128Z + 2025-07-07T18:16:47.765Z https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html - 2025-07-07T14:14:59.969Z + 2025-07-07T18:16:47.604Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html - 2025-07-07T14:14:59.932Z + 2025-07-07T18:16:47.566Z https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html - 2025-07-07T14:14:59.439Z + 2025-07-07T18:16:47.068Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html - 2025-07-07T14:14:59.763Z + 2025-07-07T18:16:47.394Z https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html - 2025-07-07T14:15:00.242Z + 2025-07-07T18:16:47.881Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html - 2025-07-07T14:14:59.948Z + 2025-07-07T18:16:47.582Z https://docs.axolotl.ai/docs/api/utils.tokenization.html - 2025-07-07T14:15:00.231Z + 2025-07-07T18:16:47.869Z https://docs.axolotl.ai/docs/api/loaders.model.html - 2025-07-07T14:14:59.723Z + 2025-07-07T18:16:47.354Z https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html - 2025-07-07T14:15:00.701Z + 2025-07-07T18:16:48.343Z https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html - 2025-07-07T14:14:59.700Z + 2025-07-07T18:16:47.331Z https://docs.axolotl.ai/docs/api/cli.main.html - 2025-07-07T14:14:59.485Z + 2025-07-07T18:16:47.115Z https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html - 2025-07-07T14:15:00.704Z + 2025-07-07T18:16:48.347Z https://docs.axolotl.ai/docs/api/utils.chat_templates.html - 2025-07-07T14:15:00.232Z + 2025-07-07T18:16:47.870Z https://docs.axolotl.ai/docs/api/utils.schemas.utils.html - 2025-07-07T14:15:00.442Z + 2025-07-07T18:16:48.083Z https://docs.axolotl.ai/docs/api/common.architectures.html - 2025-07-07T14:15:00.599Z + 2025-07-07T18:16:48.241Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html - 2025-07-07T14:15:00.136Z + 2025-07-07T18:16:47.773Z https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html - 2025-07-07T14:14:59.834Z + 2025-07-07T18:16:47.467Z https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html - 2025-07-07T14:15:00.686Z + 2025-07-07T18:16:48.328Z https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html - 2025-07-07T14:15:00.581Z + 2025-07-07T18:16:48.222Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html - 2025-07-07T14:14:59.927Z + 2025-07-07T18:16:47.562Z https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html - 2025-07-07T14:15:00.195Z + 2025-07-07T18:16:47.833Z https://docs.axolotl.ai/docs/api/train.html - 2025-07-07T14:14:59.296Z + 2025-07-07T18:16:46.924Z https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html - 2025-07-07T14:15:00.175Z + 2025-07-07T18:16:47.813Z https://docs.axolotl.ai/docs/api/index.html - 2025-07-07T14:14:59.234Z + 2025-07-07T18:16:46.862Z https://docs.axolotl.ai/docs/api/loaders.adapter.html - 2025-07-07T14:14:59.738Z + 2025-07-07T18:16:47.369Z https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html - 2025-07-07T14:15:00.414Z + 2025-07-07T18:16:48.055Z https://docs.axolotl.ai/docs/api/kernels.lora.html - 2025-07-07T14:15:00.056Z + 2025-07-07T18:16:47.692Z https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html - 2025-07-07T14:14:59.940Z + 2025-07-07T18:16:47.574Z https://docs.axolotl.ai/docs/api/cli.checks.html - 2025-07-07T14:14:59.528Z + 2025-07-07T18:16:47.158Z https://docs.axolotl.ai/docs/api/cli.quantize.html - 2025-07-07T14:14:59.647Z + 2025-07-07T18:16:47.277Z https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html - 2025-07-07T14:15:00.595Z + 2025-07-07T18:16:48.236Z https://docs.axolotl.ai/docs/api/core.chat.messages.html - 2025-07-07T14:14:59.437Z + 2025-07-07T18:16:47.066Z https://docs.axolotl.ai/docs/api/core.builders.causal.html - 2025-07-07T14:14:59.394Z + 2025-07-07T18:16:47.023Z https://docs.axolotl.ai/docs/api/core.trainers.relora.html - 2025-07-07T14:14:59.683Z + 2025-07-07T18:16:47.313Z https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html - 2025-07-07T14:15:00.617Z + 2025-07-07T18:16:48.259Z https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html - 2025-07-07T14:15:00.198Z + 2025-07-07T18:16:47.836Z https://docs.axolotl.ai/docs/api/core.trainers.mamba.html - 2025-07-07T14:14:59.678Z + 2025-07-07T18:16:47.309Z https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html - 2025-07-07T14:14:59.454Z + 2025-07-07T18:16:47.083Z https://docs.axolotl.ai/docs/api/loaders.processor.html - 2025-07-07T14:14:59.733Z + 2025-07-07T18:16:47.364Z https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html - 2025-07-07T14:14:59.440Z + 2025-07-07T18:16:47.069Z https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html - 2025-07-07T14:14:59.905Z + 2025-07-07T18:16:47.540Z https://docs.axolotl.ai/docs/api/cli.train.html - 2025-07-07T14:14:59.493Z + 2025-07-07T18:16:47.123Z https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html - 2025-07-07T14:14:59.753Z + 2025-07-07T18:16:47.384Z https://docs.axolotl.ai/docs/api/utils.collators.mamba.html - 2025-07-07T14:15:00.641Z + 2025-07-07T18:16:48.282Z https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html - 2025-07-07T14:15:00.192Z + 2025-07-07T18:16:47.830Z https://docs.axolotl.ai/docs/api/utils.dict.html - 2025-07-07T14:15:00.318Z + 2025-07-07T18:16:47.957Z https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html - 2025-07-07T14:14:59.855Z + 2025-07-07T18:16:47.489Z https://docs.axolotl.ai/docs/api/core.training_args.html - 2025-07-07T14:14:59.414Z + 2025-07-07T18:16:47.043Z https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html - 2025-07-07T14:14:59.930Z + 2025-07-07T18:16:47.565Z https://docs.axolotl.ai/docs/api/prompt_tokenizers.html - 2025-07-07T14:14:59.373Z + 2025-07-07T18:16:47.002Z https://docs.axolotl.ai/docs/api/common.const.html - 2025-07-07T14:15:00.601Z + 2025-07-07T18:16:48.243Z https://docs.axolotl.ai/docs/fsdp_qlora.html - 2025-07-07T14:11:57.261Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/custom_integrations.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/getting-started.html - 2025-07-07T14:11:57.261Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/faq.html - 2025-07-07T14:11:57.261Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/lora_optims.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/rlhf.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.645Z https://docs.axolotl.ai/docs/amd_hpc.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.640Z https://docs.axolotl.ai/docs/installation.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/multipack.html - 2025-07-07T14:11:57.264Z + 2025-07-07T18:13:45.644Z https://docs.axolotl.ai/docs/dataset_preprocessing.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/dataset_loading.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/dataset-formats/template_free.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/dataset-formats/index.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z https://docs.axolotl.ai/docs/dataset-formats/pretraining.html - 2025-07-07T14:11:57.260Z + 2025-07-07T18:13:45.641Z