diff --git a/.nojekyll b/.nojekyll
index 8223b6d0f..ab8a0397c 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-5e7d0afb
\ No newline at end of file
+3cfe7963
\ No newline at end of file
diff --git a/docs/api/index.html b/docs/api/index.html
index c9e2b804d..a1aed0f36 100644
--- a/docs/api/index.html
+++ b/docs/api/index.html
@@ -1004,7 +1004,7 @@ ul.task-list li input[type="checkbox"] {
 </tr>
 <tr class="even">
 <td><a href="../../docs/api/utils.collators.batching.html#axolotl.utils.collators.batching">utils.collators.batching</a></td>
-<td>Data collators for axolotl to pad labels and position_ids for packed sequences. Also</td>
+<td>Data collators for axolotl to pad labels and position_ids for packed sequences</td>
 </tr>
 <tr class="odd">
 <td><a href="../../docs/api/utils.collators.mamba.html#axolotl.utils.collators.mamba">utils.collators.mamba</a></td>
diff --git a/docs/api/utils.collators.batching.html b/docs/api/utils.collators.batching.html
index 0b6e518e0..acfde0de6 100644
--- a/docs/api/utils.collators.batching.html
+++ b/docs/api/utils.collators.batching.html
@@ -466,8 +466,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <section id="axolotl.utils.collators.batching" class="level1">
 <h1>utils.collators.batching</h1>
 <p><code>utils.collators.batching</code></p>
-<p>Data collators for axolotl to pad labels and position_ids for packed sequences. Also
-includes logic for handling sequence parallelism collation.</p>
+<p>Data collators for axolotl to pad labels and position_ids for packed sequences</p>
 <section id="classes" class="level2">
 <h2 class="anchored" data-anchor-id="classes">Classes</h2>
 <table class="caption-top table">
@@ -508,9 +507,7 @@ includes logic for handling sequence parallelism collation.</p>
 <span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a>    label_pad_token_id<span class="op">=-</span><span class="dv">100</span>,</span>
 <span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>    position_pad_token_id<span class="op">=</span><span class="dv">0</span>,</span>
 <span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>    return_tensors<span class="op">=</span><span class="st">'pt'</span>,</span>
-<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a>    sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
-<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a>    ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Collator for multipack specific to the using the BatchSampler</p>
 </section>
 <section id="axolotl.utils.collators.batching.DataCollatorForSeq2Seq" class="level3">
@@ -525,17 +522,15 @@ includes logic for handling sequence parallelism collation.</p>
 <span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a>    label_pad_token_id<span class="op">=-</span><span class="dv">100</span>,</span>
 <span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a>    position_pad_token_id<span class="op">=</span><span class="dv">0</span>,</span>
 <span id="cb2-10"><a href="#cb2-10" aria-hidden="true" tabindex="-1"></a>    return_tensors<span class="op">=</span><span class="st">'pt'</span>,</span>
-<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a>    sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
-<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a>    ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Data collator that will dynamically pad the inputs received, as well as the labels and position_ids</p>
 <section id="parameters" class="level4 doc-section doc-section-parameters">
 <h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters">Parameters</h4>
 <table class="caption-top table">
 <colgroup>
-<col style="width: 4%">
+<col style="width: 3%">
 <col style="width: 12%">
-<col style="width: 81%">
+<col style="width: 82%">
 <col style="width: 1%">
 </colgroup>
 <thead>
@@ -589,111 +584,33 @@ includes logic for handling sequence parallelism collation.</p>
 <td>The type of Tensor to return. Allowable values are “np”, “pt” and “tf”.</td>
 <td><code>'pt'</code></td>
 </tr>
-<tr class="even">
-<td>sequence_parallel_degree</td>
-<td><code>int</code></td>
-<td>The degree of sequence parallelism. Default to 1 for no sequence parallelism.</td>
-<td><code>1</code></td>
-</tr>
 </tbody>
 </table>
 </section>
-<section id="methods" class="level4">
-<h4 class="anchored" data-anchor-id="methods">Methods</h4>
-<table class="caption-top table">
-<thead>
-<tr class="header">
-<th>Name</th>
-<th>Description</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td><a href="#axolotl.utils.collators.batching.DataCollatorForSeq2Seq.apply_sequence_parallelism">apply_sequence_parallelism</a></td>
-<td>Apply sequence parallelism slicing to a batch.</td>
-</tr>
-</tbody>
-</table>
-<section id="axolotl.utils.collators.batching.DataCollatorForSeq2Seq.apply_sequence_parallelism" class="level5">
-<h5 class="anchored" data-anchor-id="axolotl.utils.collators.batching.DataCollatorForSeq2Seq.apply_sequence_parallelism">apply_sequence_parallelism</h5>
-<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>utils.collators.batching.DataCollatorForSeq2Seq.apply_sequence_parallelism(</span>
-<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    batch,</span>
-<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<p>Apply sequence parallelism slicing to a batch.</p>
-<section id="parameters-1" class="level6 doc-section doc-section-parameters">
-<h6 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters-1">Parameters</h6>
-<table class="caption-top table">
-<colgroup>
-<col style="width: 9%">
-<col style="width: 31%">
-<col style="width: 45%">
-<col style="width: 13%">
-</colgroup>
-<thead>
-<tr class="header">
-<th>Name</th>
-<th>Type</th>
-<th>Description</th>
-<th>Default</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td>batch</td>
-<td>dict[str, torch.Tensor]</td>
-<td>Batch dictionary from parent collator.</td>
-<td><em>required</em></td>
-</tr>
-</tbody>
-</table>
-</section>
-<section id="returns" class="level6 doc-section doc-section-returns">
-<h6 class="doc-section doc-section-returns anchored" data-anchor-id="returns">Returns</h6>
-<table class="caption-top table">
-<thead>
-<tr class="header">
-<th>Name</th>
-<th>Type</th>
-<th>Description</th>
-</tr>
-</thead>
-<tbody>
-<tr class="odd">
-<td></td>
-<td>torch.Tensor</td>
-<td>Sliced batch dictionary.</td>
-</tr>
-</tbody>
-</table>
-</section>
-</section>
-</section>
 </section>
 <section id="axolotl.utils.collators.batching.PretrainingBatchSamplerDataCollatorForSeq2Seq" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.utils.collators.batching.PretrainingBatchSamplerDataCollatorForSeq2Seq">PretrainingBatchSamplerDataCollatorForSeq2Seq</h3>
-<div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>utils.collators.batching.PretrainingBatchSamplerDataCollatorForSeq2Seq(</span>
-<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>    <span class="va">self</span>,</span>
-<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>    <span class="op">*</span>args,</span>
-<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>    multipack_attn<span class="op">=</span><span class="va">True</span>,</span>
-<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>    <span class="op">**</span>kwargs,</span>
-<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>utils.collators.batching.PretrainingBatchSamplerDataCollatorForSeq2Seq(</span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    <span class="va">self</span>,</span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>    <span class="op">*</span>args,</span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a>    multipack_attn<span class="op">=</span><span class="va">True</span>,</span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>    <span class="op">**</span>kwargs,</span>
+<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Collator for multipack specific to the using the BatchSampler</p>
 </section>
 <section id="axolotl.utils.collators.batching.V2BatchSamplerDataCollatorForSeq2Seq" class="level3">
 <h3 class="anchored" data-anchor-id="axolotl.utils.collators.batching.V2BatchSamplerDataCollatorForSeq2Seq">V2BatchSamplerDataCollatorForSeq2Seq</h3>
-<div class="sourceCode" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>utils.collators.batching.V2BatchSamplerDataCollatorForSeq2Seq(</span>
-<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>    <span class="va">self</span>,</span>
-<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>    tokenizer,</span>
-<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>    model<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>    padding<span class="op">=</span><span class="va">True</span>,</span>
-<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a>    max_length<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>    pad_to_multiple_of<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a>    label_pad_token_id<span class="op">=-</span><span class="dv">100</span>,</span>
-<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a>    position_pad_token_id<span class="op">=</span><span class="dv">0</span>,</span>
-<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a>    return_tensors<span class="op">=</span><span class="st">'pt'</span>,</span>
-<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a>    sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
-<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a>    ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
-<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>utils.collators.batching.V2BatchSamplerDataCollatorForSeq2Seq(</span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>    <span class="va">self</span>,</span>
+<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>    tokenizer,</span>
+<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>    model<span class="op">=</span><span class="va">None</span>,</span>
+<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>    padding<span class="op">=</span><span class="va">True</span>,</span>
+<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a>    max_length<span class="op">=</span><span class="va">None</span>,</span>
+<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a>    pad_to_multiple_of<span class="op">=</span><span class="va">None</span>,</span>
+<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a>    label_pad_token_id<span class="op">=-</span><span class="dv">100</span>,</span>
+<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a>    position_pad_token_id<span class="op">=</span><span class="dv">0</span>,</span>
+<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a>    return_tensors<span class="op">=</span><span class="st">'pt'</span>,</span>
+<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <p>Collator for multipack specific to the using the BatchSampler</p>
 
 
diff --git a/search.json b/search.json
index 3be5705ef..6be7782b5 100644
--- a/search.json
+++ b/search.json
@@ -659,7 +659,7 @@
     "href": "docs/api/index.html",
     "title": "API Reference",
     "section": "",
-    "text": "Core functionality for training\n\n\n\ntrain\nPrepare and train a model on a dataset. Can also infer from a model or merge lora\n\n\nevaluate\nModule for evaluating models.\n\n\ndatasets\nModule containing Dataset functionality\n\n\nconvert\nModule containing File Reader, File Writer, Json Parser, and Jsonl Serializer classes\n\n\nprompt_tokenizers\nModule containing PromptTokenizingStrategy and Prompter classes\n\n\nlogging_config\nCommon logging module for axolotl\n\n\ncore.trainer_builder\nBuilder for the training args and trainer\n\n\ncore.training_args\nextra axolotl specific training args\n\n\ncore.chat.messages\ninternal message representations of chat messages\n\n\ncore.chat.format.chatml\nChatML transformation functions for MessageContents\n\n\ncore.chat.format.llama3x\nLlama 3.x chat formatting functions for MessageContents\n\n\ncore.chat.format.shared\nshared functions for format transforms\n\n\ncore.datasets.chat\nchat dataset module\n\n\ncore.datasets.transforms.chat_builder\nThis module contains a function that builds a transform that takes a row from the dataset and converts it to a Chat.\n\n\n\n\n\n\nCommand-line interface\n\n\n\ncli.main\nClick CLI definitions for various axolotl commands.\n\n\ncli.train\nCLI to run training on a model.\n\n\ncli.evaluate\nCLI to run evaluation on a model.\n\n\ncli.args\nModule for axolotl CLI command arguments.\n\n\ncli.checks\nVarious checks for Axolotl CLI.\n\n\ncli.config\nConfiguration loading and processing.\n\n\ncli.inference\nCLI to run inference on a trained model.\n\n\ncli.merge_lora\nCLI to merge a trained LoRA into a base model.\n\n\ncli.merge_sharded_fsdp_weights\nCLI to merge sharded FSDP model checkpoints into a single combined checkpoint.\n\n\ncli.preprocess\nCLI to run preprocessing of a dataset.\n\n\ncli.sweeps\nUtilities for handling sweeps over configs for axolotl train CLI command\n\n\ncli.utils\nUtility methods for axolotl CLI.\n\n\ncli.vllm_serve\nCLI to start the vllm server for online RL\n\n\ncli.cloud.base\nbase class for cloud platforms from cli\n\n\ncli.cloud.modal_\nModal Cloud support from CLI\n\n\n\n\n\n\nTraining implementations\n\n\n\ncore.trainers.base\nModule for customized trainers\n\n\ncore.trainers.trl\nModule for TRL PPO trainer\n\n\ncore.trainers.dpo.trainer\nDPO trainer for axolotl\n\n\ncore.trainers.grpo.trainer\nAxolotl GRPO trainer\n\n\n\n\n\n\nPrompt formatting strategies\n\n\n\nprompt_strategies.base\nmodule for base dataset transform strategies\n\n\nprompt_strategies.chat_template\nHF Chat Templates prompt strategy\n\n\nprompt_strategies.alpaca_chat\nModule for Alpaca prompt strategy classes\n\n\nprompt_strategies.alpaca_instruct\nModule loading the AlpacaInstructPromptTokenizingStrategy class\n\n\nprompt_strategies.alpaca_w_system\nPrompt strategies loader for alpaca instruction datasets with system prompts\n\n\nprompt_strategies.user_defined\nUser Defined prompts with configuration from the YML config\n\n\nprompt_strategies.llama2_chat\nPrompt Strategy for finetuning Llama2 chat models\n\n\nprompt_strategies.completion\nBasic completion text\n\n\nprompt_strategies.input_output\nModule for plain input/output prompt pairs\n\n\nprompt_strategies.stepwise_supervised\nModule for stepwise datasets, typically including a prompt and reasoning traces,\n\n\nprompt_strategies.metharme\nModule containing the MetharmenPromptTokenizingStrategy and MetharmePrompter class\n\n\nprompt_strategies.orcamini\nPrompt Strategy for finetuning Orca Mini (v2) models\n\n\nprompt_strategies.pygmalion\nModule containing the PygmalionPromptTokenizingStrategy and PygmalionPrompter class\n\n\nprompt_strategies.messages.chat\nChat dataset wrapping strategy for new internal messages representations\n\n\nprompt_strategies.dpo.chat_template\nDPO prompt strategies for using tokenizer chat templates.\n\n\nprompt_strategies.dpo.llama3\nDPO strategies for llama-3 chat template\n\n\nprompt_strategies.dpo.chatml\nDPO strategies for chatml\n\n\nprompt_strategies.dpo.zephyr\nDPO strategies for zephyr\n\n\nprompt_strategies.dpo.user_defined\nUser-defined DPO strategies\n\n\nprompt_strategies.dpo.passthrough\nDPO prompt strategies passthrough/zero-processing strategy\n\n\nprompt_strategies.kto.llama3\nKTO strategies for llama-3 chat template\n\n\nprompt_strategies.kto.chatml\nKTO strategies for chatml\n\n\nprompt_strategies.kto.user_defined\nUser-defined KTO strategies\n\n\nprompt_strategies.orpo.chat_template\nchatml prompt tokenization strategy for ORPO\n\n\nprompt_strategies.bradley_terry.llama3\nchatml transforms for datasets with system, input, chosen, rejected to match llama3 chat template\n\n\n\n\n\n\nLow-level performance optimizations\n\n\n\nkernels.lora\nModule for definition of Low-Rank Adaptation (LoRA) Triton kernels.\n\n\nkernels.geglu\nModule for definition of GEGLU Triton kernels.\n\n\nkernels.swiglu\nModule for definition of SwiGLU Triton kernels.\n\n\nkernels.quantize\nDequantization utilities for bitsandbytes integration.\n\n\nkernels.utils\nUtilities for axolotl.kernels submodules.\n\n\n\n\n\n\nRuntime patches for model optimizations\n\n\n\nmonkeypatch.llama_attn_hijack_flash\nFlash attention monkey patch for llama model\n\n\nmonkeypatch.llama_attn_hijack_xformers\nDirectly copied the code from https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/modules/llama_attn_hijack.py and made some adjustments\n\n\nmonkeypatch.mistral_attn_hijack_flash\nFlash attention monkey patch for mistral model\n\n\nmonkeypatch.multipack\nmultipack patching for v2 of sample packing\n\n\nmonkeypatch.relora\nImplements the ReLoRA training procedure from https://arxiv.org/abs/2307.05695, minus the initial full fine-tune.\n\n\nmonkeypatch.llama_expand_mask\nexpands the binary attention mask per 3.2.2 of https://arxiv.org/pdf/2107.02027.pdf\n\n\nmonkeypatch.lora_kernels\nModule for patching custom LoRA Triton kernels and torch.autograd functions.\n\n\nmonkeypatch.utils\nShared utils for the monkeypatches\n\n\nmonkeypatch.btlm_attn_hijack_flash\nFlash attention monkey patch for cerebras btlm model\n\n\nmonkeypatch.llama_patch_multipack\nPatched LlamaAttention to use torch.nn.functional.scaled_dot_product_attention\n\n\nmonkeypatch.stablelm_attn_hijack_flash\nPyTorch StableLM Epoch model.\n\n\nmonkeypatch.trainer_fsdp_optim\nfix for FSDP optimizer save in trainer w 4.47.0\n\n\nmonkeypatch.transformers_fa_utils\nsee https://github.com/huggingface/transformers/pull/35834\n\n\nmonkeypatch.unsloth_\nmodule for patching with unsloth optimizations\n\n\nmonkeypatch.attention.mllama\nMonkeypatch for Vision Llama for FA2 support\n\n\nmonkeypatch.data.batch_dataset_fetcher\nmonkey patches for the dataset fetcher to handle batches of packed indexes\n\n\nmonkeypatch.mixtral\nPatches to support multipack for mixtral\n\n\n\n\n\n\nUtility functions\n\n\n\nutils.models\nModule for models and model loading\n\n\nutils.tokenization\nModule for tokenization utilities\n\n\nutils.chat_templates\nThis module provides functionality for selecting chat templates based on user choices.\n\n\nutils.lora\nmodule to get the state dict of a merged lora model\n\n\nutils.lora_embeddings\nhelpers for lora embeddings\n\n\nutils.model_shard_quant\nmodule to handle loading model on cpu/meta device for FSDP\n\n\nutils.bench\nBenchmarking and measurement utilities\n\n\nutils.freeze\nmodule to freeze/unfreeze parameters by name\n\n\nutils.trainer\nModule containing the Trainer class and related functions\n\n\nutils.schedulers\nModule for custom LRScheduler class\n\n\nutils.distributed\nutility helpers for distributed checks\n\n\nutils.dict\nModule containing the DictDefault class\n\n\nutils.optimizers.adopt\nCopied from https://github.com/iShohei220/adopt\n\n\nutils.data.pretraining\ndata handling specific to pretraining\n\n\nutils.data.sft\ndata handling specific to SFT\n\n\nutils.gradient_checkpointing.unsloth\nUnsloth checkpointing\n\n\n\n\n\n\nPydantic data models for Axolotl config\n\n\n\nutils.schemas.config\nModule with Pydantic models for configuration.\n\n\nutils.schemas.model\nPydantic models for model input / output, etc. configuration\n\n\nutils.schemas.training\nPydantic models for training hyperparameters\n\n\nutils.schemas.datasets\nPydantic models for datasets-related configuration\n\n\nutils.schemas.peft\nPydantic models for PEFT-related configuration\n\n\nutils.schemas.trl\nPydantic models for TRL trainer configuration\n\n\nutils.schemas.multimodal\nPydantic models for multimodal-related configuration\n\n\nutils.schemas.integrations\nPydantic models for Axolotl integrations\n\n\nutils.schemas.enums\nEnums for Axolotl input config\n\n\nutils.schemas.utils\nUtilities for Axolotl Pydantic models\n\n\n\n\n\n\nThird-party integrations and extensions\n\n\n\nintegrations.base\nBase class for all plugins.\n\n\nintegrations.cut_cross_entropy.args\nModule for handling Cut Cross Entropy input arguments.\n\n\nintegrations.grokfast.optimizer\n\n\n\nintegrations.kd.trainer\nKD trainer\n\n\nintegrations.liger.args\nModule for handling LIGER input arguments.\n\n\nintegrations.lm_eval.args\nModule for handling lm eval harness input arguments.\n\n\nintegrations.spectrum.args\nModule for handling Spectrum input arguments.\n\n\n\n\n\n\nCommon utilities and shared functionality\n\n\n\ncommon.architectures\nCommon architecture specific constants\n\n\ncommon.const\nVarious shared constants\n\n\ncommon.datasets\nDataset loading utilities.\n\n\n\n\n\n\nCustom model implementations\n\n\n\nmodels.mamba.modeling_mamba\n\n\n\n\n\n\n\nData processing utilities\n\n\n\nutils.collators.core\nbasic shared collator constants\n\n\nutils.collators.batching\nData collators for axolotl to pad labels and position_ids for packed sequences. Also\n\n\nutils.collators.mamba\ncollators for Mamba\n\n\nutils.collators.mm_chat\nCollators for multi-modal chat messages and packing\n\n\nutils.samplers.multipack\nMultipack Batch Sampler\n\n\n\n\n\n\nTraining callbacks\n\n\n\nutils.callbacks.perplexity\ncallback to calculate perplexity as an evaluation metric.\n\n\nutils.callbacks.profiler\nHF Trainer callback for creating pytorch profiling snapshots\n\n\nutils.callbacks.lisa\nmodule for LISA\n\n\nutils.callbacks.mlflow_\nMLFlow module for trainer callbacks\n\n\nutils.callbacks.comet_\nComet module for trainer callbacks"
+    "text": "Core functionality for training\n\n\n\ntrain\nPrepare and train a model on a dataset. Can also infer from a model or merge lora\n\n\nevaluate\nModule for evaluating models.\n\n\ndatasets\nModule containing Dataset functionality\n\n\nconvert\nModule containing File Reader, File Writer, Json Parser, and Jsonl Serializer classes\n\n\nprompt_tokenizers\nModule containing PromptTokenizingStrategy and Prompter classes\n\n\nlogging_config\nCommon logging module for axolotl\n\n\ncore.trainer_builder\nBuilder for the training args and trainer\n\n\ncore.training_args\nextra axolotl specific training args\n\n\ncore.chat.messages\ninternal message representations of chat messages\n\n\ncore.chat.format.chatml\nChatML transformation functions for MessageContents\n\n\ncore.chat.format.llama3x\nLlama 3.x chat formatting functions for MessageContents\n\n\ncore.chat.format.shared\nshared functions for format transforms\n\n\ncore.datasets.chat\nchat dataset module\n\n\ncore.datasets.transforms.chat_builder\nThis module contains a function that builds a transform that takes a row from the dataset and converts it to a Chat.\n\n\n\n\n\n\nCommand-line interface\n\n\n\ncli.main\nClick CLI definitions for various axolotl commands.\n\n\ncli.train\nCLI to run training on a model.\n\n\ncli.evaluate\nCLI to run evaluation on a model.\n\n\ncli.args\nModule for axolotl CLI command arguments.\n\n\ncli.checks\nVarious checks for Axolotl CLI.\n\n\ncli.config\nConfiguration loading and processing.\n\n\ncli.inference\nCLI to run inference on a trained model.\n\n\ncli.merge_lora\nCLI to merge a trained LoRA into a base model.\n\n\ncli.merge_sharded_fsdp_weights\nCLI to merge sharded FSDP model checkpoints into a single combined checkpoint.\n\n\ncli.preprocess\nCLI to run preprocessing of a dataset.\n\n\ncli.sweeps\nUtilities for handling sweeps over configs for axolotl train CLI command\n\n\ncli.utils\nUtility methods for axolotl CLI.\n\n\ncli.vllm_serve\nCLI to start the vllm server for online RL\n\n\ncli.cloud.base\nbase class for cloud platforms from cli\n\n\ncli.cloud.modal_\nModal Cloud support from CLI\n\n\n\n\n\n\nTraining implementations\n\n\n\ncore.trainers.base\nModule for customized trainers\n\n\ncore.trainers.trl\nModule for TRL PPO trainer\n\n\ncore.trainers.dpo.trainer\nDPO trainer for axolotl\n\n\ncore.trainers.grpo.trainer\nAxolotl GRPO trainer\n\n\n\n\n\n\nPrompt formatting strategies\n\n\n\nprompt_strategies.base\nmodule for base dataset transform strategies\n\n\nprompt_strategies.chat_template\nHF Chat Templates prompt strategy\n\n\nprompt_strategies.alpaca_chat\nModule for Alpaca prompt strategy classes\n\n\nprompt_strategies.alpaca_instruct\nModule loading the AlpacaInstructPromptTokenizingStrategy class\n\n\nprompt_strategies.alpaca_w_system\nPrompt strategies loader for alpaca instruction datasets with system prompts\n\n\nprompt_strategies.user_defined\nUser Defined prompts with configuration from the YML config\n\n\nprompt_strategies.llama2_chat\nPrompt Strategy for finetuning Llama2 chat models\n\n\nprompt_strategies.completion\nBasic completion text\n\n\nprompt_strategies.input_output\nModule for plain input/output prompt pairs\n\n\nprompt_strategies.stepwise_supervised\nModule for stepwise datasets, typically including a prompt and reasoning traces,\n\n\nprompt_strategies.metharme\nModule containing the MetharmenPromptTokenizingStrategy and MetharmePrompter class\n\n\nprompt_strategies.orcamini\nPrompt Strategy for finetuning Orca Mini (v2) models\n\n\nprompt_strategies.pygmalion\nModule containing the PygmalionPromptTokenizingStrategy and PygmalionPrompter class\n\n\nprompt_strategies.messages.chat\nChat dataset wrapping strategy for new internal messages representations\n\n\nprompt_strategies.dpo.chat_template\nDPO prompt strategies for using tokenizer chat templates.\n\n\nprompt_strategies.dpo.llama3\nDPO strategies for llama-3 chat template\n\n\nprompt_strategies.dpo.chatml\nDPO strategies for chatml\n\n\nprompt_strategies.dpo.zephyr\nDPO strategies for zephyr\n\n\nprompt_strategies.dpo.user_defined\nUser-defined DPO strategies\n\n\nprompt_strategies.dpo.passthrough\nDPO prompt strategies passthrough/zero-processing strategy\n\n\nprompt_strategies.kto.llama3\nKTO strategies for llama-3 chat template\n\n\nprompt_strategies.kto.chatml\nKTO strategies for chatml\n\n\nprompt_strategies.kto.user_defined\nUser-defined KTO strategies\n\n\nprompt_strategies.orpo.chat_template\nchatml prompt tokenization strategy for ORPO\n\n\nprompt_strategies.bradley_terry.llama3\nchatml transforms for datasets with system, input, chosen, rejected to match llama3 chat template\n\n\n\n\n\n\nLow-level performance optimizations\n\n\n\nkernels.lora\nModule for definition of Low-Rank Adaptation (LoRA) Triton kernels.\n\n\nkernels.geglu\nModule for definition of GEGLU Triton kernels.\n\n\nkernels.swiglu\nModule for definition of SwiGLU Triton kernels.\n\n\nkernels.quantize\nDequantization utilities for bitsandbytes integration.\n\n\nkernels.utils\nUtilities for axolotl.kernels submodules.\n\n\n\n\n\n\nRuntime patches for model optimizations\n\n\n\nmonkeypatch.llama_attn_hijack_flash\nFlash attention monkey patch for llama model\n\n\nmonkeypatch.llama_attn_hijack_xformers\nDirectly copied the code from https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/modules/llama_attn_hijack.py and made some adjustments\n\n\nmonkeypatch.mistral_attn_hijack_flash\nFlash attention monkey patch for mistral model\n\n\nmonkeypatch.multipack\nmultipack patching for v2 of sample packing\n\n\nmonkeypatch.relora\nImplements the ReLoRA training procedure from https://arxiv.org/abs/2307.05695, minus the initial full fine-tune.\n\n\nmonkeypatch.llama_expand_mask\nexpands the binary attention mask per 3.2.2 of https://arxiv.org/pdf/2107.02027.pdf\n\n\nmonkeypatch.lora_kernels\nModule for patching custom LoRA Triton kernels and torch.autograd functions.\n\n\nmonkeypatch.utils\nShared utils for the monkeypatches\n\n\nmonkeypatch.btlm_attn_hijack_flash\nFlash attention monkey patch for cerebras btlm model\n\n\nmonkeypatch.llama_patch_multipack\nPatched LlamaAttention to use torch.nn.functional.scaled_dot_product_attention\n\n\nmonkeypatch.stablelm_attn_hijack_flash\nPyTorch StableLM Epoch model.\n\n\nmonkeypatch.trainer_fsdp_optim\nfix for FSDP optimizer save in trainer w 4.47.0\n\n\nmonkeypatch.transformers_fa_utils\nsee https://github.com/huggingface/transformers/pull/35834\n\n\nmonkeypatch.unsloth_\nmodule for patching with unsloth optimizations\n\n\nmonkeypatch.attention.mllama\nMonkeypatch for Vision Llama for FA2 support\n\n\nmonkeypatch.data.batch_dataset_fetcher\nmonkey patches for the dataset fetcher to handle batches of packed indexes\n\n\nmonkeypatch.mixtral\nPatches to support multipack for mixtral\n\n\n\n\n\n\nUtility functions\n\n\n\nutils.models\nModule for models and model loading\n\n\nutils.tokenization\nModule for tokenization utilities\n\n\nutils.chat_templates\nThis module provides functionality for selecting chat templates based on user choices.\n\n\nutils.lora\nmodule to get the state dict of a merged lora model\n\n\nutils.lora_embeddings\nhelpers for lora embeddings\n\n\nutils.model_shard_quant\nmodule to handle loading model on cpu/meta device for FSDP\n\n\nutils.bench\nBenchmarking and measurement utilities\n\n\nutils.freeze\nmodule to freeze/unfreeze parameters by name\n\n\nutils.trainer\nModule containing the Trainer class and related functions\n\n\nutils.schedulers\nModule for custom LRScheduler class\n\n\nutils.distributed\nutility helpers for distributed checks\n\n\nutils.dict\nModule containing the DictDefault class\n\n\nutils.optimizers.adopt\nCopied from https://github.com/iShohei220/adopt\n\n\nutils.data.pretraining\ndata handling specific to pretraining\n\n\nutils.data.sft\ndata handling specific to SFT\n\n\nutils.gradient_checkpointing.unsloth\nUnsloth checkpointing\n\n\n\n\n\n\nPydantic data models for Axolotl config\n\n\n\nutils.schemas.config\nModule with Pydantic models for configuration.\n\n\nutils.schemas.model\nPydantic models for model input / output, etc. configuration\n\n\nutils.schemas.training\nPydantic models for training hyperparameters\n\n\nutils.schemas.datasets\nPydantic models for datasets-related configuration\n\n\nutils.schemas.peft\nPydantic models for PEFT-related configuration\n\n\nutils.schemas.trl\nPydantic models for TRL trainer configuration\n\n\nutils.schemas.multimodal\nPydantic models for multimodal-related configuration\n\n\nutils.schemas.integrations\nPydantic models for Axolotl integrations\n\n\nutils.schemas.enums\nEnums for Axolotl input config\n\n\nutils.schemas.utils\nUtilities for Axolotl Pydantic models\n\n\n\n\n\n\nThird-party integrations and extensions\n\n\n\nintegrations.base\nBase class for all plugins.\n\n\nintegrations.cut_cross_entropy.args\nModule for handling Cut Cross Entropy input arguments.\n\n\nintegrations.grokfast.optimizer\n\n\n\nintegrations.kd.trainer\nKD trainer\n\n\nintegrations.liger.args\nModule for handling LIGER input arguments.\n\n\nintegrations.lm_eval.args\nModule for handling lm eval harness input arguments.\n\n\nintegrations.spectrum.args\nModule for handling Spectrum input arguments.\n\n\n\n\n\n\nCommon utilities and shared functionality\n\n\n\ncommon.architectures\nCommon architecture specific constants\n\n\ncommon.const\nVarious shared constants\n\n\ncommon.datasets\nDataset loading utilities.\n\n\n\n\n\n\nCustom model implementations\n\n\n\nmodels.mamba.modeling_mamba\n\n\n\n\n\n\n\nData processing utilities\n\n\n\nutils.collators.core\nbasic shared collator constants\n\n\nutils.collators.batching\nData collators for axolotl to pad labels and position_ids for packed sequences\n\n\nutils.collators.mamba\ncollators for Mamba\n\n\nutils.collators.mm_chat\nCollators for multi-modal chat messages and packing\n\n\nutils.samplers.multipack\nMultipack Batch Sampler\n\n\n\n\n\n\nTraining callbacks\n\n\n\nutils.callbacks.perplexity\ncallback to calculate perplexity as an evaluation metric.\n\n\nutils.callbacks.profiler\nHF Trainer callback for creating pytorch profiling snapshots\n\n\nutils.callbacks.lisa\nmodule for LISA\n\n\nutils.callbacks.mlflow_\nMLFlow module for trainer callbacks\n\n\nutils.callbacks.comet_\nComet module for trainer callbacks"
   },
   {
     "objectID": "docs/api/index.html#core",
@@ -743,7 +743,7 @@
     "href": "docs/api/index.html#data-processing",
     "title": "API Reference",
     "section": "",
-    "text": "Data processing utilities\n\n\n\nutils.collators.core\nbasic shared collator constants\n\n\nutils.collators.batching\nData collators for axolotl to pad labels and position_ids for packed sequences. Also\n\n\nutils.collators.mamba\ncollators for Mamba\n\n\nutils.collators.mm_chat\nCollators for multi-modal chat messages and packing\n\n\nutils.samplers.multipack\nMultipack Batch Sampler"
+    "text": "Data processing utilities\n\n\n\nutils.collators.core\nbasic shared collator constants\n\n\nutils.collators.batching\nData collators for axolotl to pad labels and position_ids for packed sequences\n\n\nutils.collators.mamba\ncollators for Mamba\n\n\nutils.collators.mm_chat\nCollators for multi-modal chat messages and packing\n\n\nutils.samplers.multipack\nMultipack Batch Sampler"
   },
   {
     "objectID": "docs/api/index.html#callbacks",
@@ -2794,14 +2794,14 @@
     "href": "docs/api/utils.collators.batching.html",
     "title": "utils.collators.batching",
     "section": "",
-    "text": "utils.collators.batching\nData collators for axolotl to pad labels and position_ids for packed sequences. Also\nincludes logic for handling sequence parallelism collation.\n\n\n\n\n\nName\nDescription\n\n\n\n\nBatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\nDataCollatorForSeq2Seq\nData collator that will dynamically pad the inputs received, as well as the labels and position_ids\n\n\nPretrainingBatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\nV2BatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\n\n\n\nutils.collators.batching.BatchSamplerDataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n    sequence_parallel_degree=1,\n    ring_attn_func=None,\n)\nCollator for multipack specific to the using the BatchSampler\n\n\n\nutils.collators.batching.DataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n    sequence_parallel_degree=1,\n    ring_attn_func=None,\n)\nData collator that will dynamically pad the inputs received, as well as the labels and position_ids\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntokenizer\n[PreTrainedTokenizer] or [PreTrainedTokenizerFast]\nThe tokenizer used for encoding the data.\nrequired\n\n\nmodel\n[PreTrainedModel]\nThe model that is being trained. If set and has the prepare_decoder_input_ids_from_labels, use it to prepare the decoder_input_ids This is useful when using label_smoothing to avoid calculating loss twice.\nNone\n\n\npadding\nbool, str or [~utils.PaddingStrategy], optional, defaults to True\nSelect a strategy to pad the returned sequences (according to the model’s padding side and padding index) among: - True or 'longest' (default): Pad to the longest sequence in the batch (or no padding if only a single sequence is provided). - 'max_length': Pad to a maximum length specified with the argument max_length or to the maximum acceptable input length for the model if that argument is not provided. - False or 'do_not_pad': No padding (i.e., can output a batch with sequences of different lengths).\nTrue\n\n\nmax_length\nint, optional\nMaximum length of the returned list and optionally padding length (see above).\nNone\n\n\npad_to_multiple_of\nint, optional\nIf set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability &gt;= 7.5 (Volta).\nNone\n\n\nlabel_pad_token_id\nint, optional, defaults to -100\nThe id to use when padding the labels (-100 will be automatically ignored by PyTorch loss functions).\n-100\n\n\nreturn_tensors\nstr\nThe type of Tensor to return. Allowable values are “np”, “pt” and “tf”.\n'pt'\n\n\nsequence_parallel_degree\nint\nThe degree of sequence parallelism. Default to 1 for no sequence parallelism.\n1\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\napply_sequence_parallelism\nApply sequence parallelism slicing to a batch.\n\n\n\n\n\nutils.collators.batching.DataCollatorForSeq2Seq.apply_sequence_parallelism(\n    batch,\n)\nApply sequence parallelism slicing to a batch.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nbatch\ndict[str, torch.Tensor]\nBatch dictionary from parent collator.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntorch.Tensor\nSliced batch dictionary.\n\n\n\n\n\n\n\n\n\nutils.collators.batching.PretrainingBatchSamplerDataCollatorForSeq2Seq(\n    self,\n    *args,\n    multipack_attn=True,\n    **kwargs,\n)\nCollator for multipack specific to the using the BatchSampler\n\n\n\nutils.collators.batching.V2BatchSamplerDataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n    sequence_parallel_degree=1,\n    ring_attn_func=None,\n)\nCollator for multipack specific to the using the BatchSampler"
+    "text": "utils.collators.batching\nData collators for axolotl to pad labels and position_ids for packed sequences\n\n\n\n\n\nName\nDescription\n\n\n\n\nBatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\nDataCollatorForSeq2Seq\nData collator that will dynamically pad the inputs received, as well as the labels and position_ids\n\n\nPretrainingBatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\nV2BatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\n\n\n\nutils.collators.batching.BatchSamplerDataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n)\nCollator for multipack specific to the using the BatchSampler\n\n\n\nutils.collators.batching.DataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n)\nData collator that will dynamically pad the inputs received, as well as the labels and position_ids\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntokenizer\n[PreTrainedTokenizer] or [PreTrainedTokenizerFast]\nThe tokenizer used for encoding the data.\nrequired\n\n\nmodel\n[PreTrainedModel]\nThe model that is being trained. If set and has the prepare_decoder_input_ids_from_labels, use it to prepare the decoder_input_ids This is useful when using label_smoothing to avoid calculating loss twice.\nNone\n\n\npadding\nbool, str or [~utils.PaddingStrategy], optional, defaults to True\nSelect a strategy to pad the returned sequences (according to the model’s padding side and padding index) among: - True or 'longest' (default): Pad to the longest sequence in the batch (or no padding if only a single sequence is provided). - 'max_length': Pad to a maximum length specified with the argument max_length or to the maximum acceptable input length for the model if that argument is not provided. - False or 'do_not_pad': No padding (i.e., can output a batch with sequences of different lengths).\nTrue\n\n\nmax_length\nint, optional\nMaximum length of the returned list and optionally padding length (see above).\nNone\n\n\npad_to_multiple_of\nint, optional\nIf set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability &gt;= 7.5 (Volta).\nNone\n\n\nlabel_pad_token_id\nint, optional, defaults to -100\nThe id to use when padding the labels (-100 will be automatically ignored by PyTorch loss functions).\n-100\n\n\nreturn_tensors\nstr\nThe type of Tensor to return. Allowable values are “np”, “pt” and “tf”.\n'pt'\n\n\n\n\n\n\n\nutils.collators.batching.PretrainingBatchSamplerDataCollatorForSeq2Seq(\n    self,\n    *args,\n    multipack_attn=True,\n    **kwargs,\n)\nCollator for multipack specific to the using the BatchSampler\n\n\n\nutils.collators.batching.V2BatchSamplerDataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n)\nCollator for multipack specific to the using the BatchSampler"
   },
   {
     "objectID": "docs/api/utils.collators.batching.html#classes",
     "href": "docs/api/utils.collators.batching.html#classes",
     "title": "utils.collators.batching",
     "section": "",
-    "text": "Name\nDescription\n\n\n\n\nBatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\nDataCollatorForSeq2Seq\nData collator that will dynamically pad the inputs received, as well as the labels and position_ids\n\n\nPretrainingBatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\nV2BatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\n\n\n\nutils.collators.batching.BatchSamplerDataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n    sequence_parallel_degree=1,\n    ring_attn_func=None,\n)\nCollator for multipack specific to the using the BatchSampler\n\n\n\nutils.collators.batching.DataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n    sequence_parallel_degree=1,\n    ring_attn_func=None,\n)\nData collator that will dynamically pad the inputs received, as well as the labels and position_ids\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntokenizer\n[PreTrainedTokenizer] or [PreTrainedTokenizerFast]\nThe tokenizer used for encoding the data.\nrequired\n\n\nmodel\n[PreTrainedModel]\nThe model that is being trained. If set and has the prepare_decoder_input_ids_from_labels, use it to prepare the decoder_input_ids This is useful when using label_smoothing to avoid calculating loss twice.\nNone\n\n\npadding\nbool, str or [~utils.PaddingStrategy], optional, defaults to True\nSelect a strategy to pad the returned sequences (according to the model’s padding side and padding index) among: - True or 'longest' (default): Pad to the longest sequence in the batch (or no padding if only a single sequence is provided). - 'max_length': Pad to a maximum length specified with the argument max_length or to the maximum acceptable input length for the model if that argument is not provided. - False or 'do_not_pad': No padding (i.e., can output a batch with sequences of different lengths).\nTrue\n\n\nmax_length\nint, optional\nMaximum length of the returned list and optionally padding length (see above).\nNone\n\n\npad_to_multiple_of\nint, optional\nIf set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability &gt;= 7.5 (Volta).\nNone\n\n\nlabel_pad_token_id\nint, optional, defaults to -100\nThe id to use when padding the labels (-100 will be automatically ignored by PyTorch loss functions).\n-100\n\n\nreturn_tensors\nstr\nThe type of Tensor to return. Allowable values are “np”, “pt” and “tf”.\n'pt'\n\n\nsequence_parallel_degree\nint\nThe degree of sequence parallelism. Default to 1 for no sequence parallelism.\n1\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\napply_sequence_parallelism\nApply sequence parallelism slicing to a batch.\n\n\n\n\n\nutils.collators.batching.DataCollatorForSeq2Seq.apply_sequence_parallelism(\n    batch,\n)\nApply sequence parallelism slicing to a batch.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nbatch\ndict[str, torch.Tensor]\nBatch dictionary from parent collator.\nrequired\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\n\ntorch.Tensor\nSliced batch dictionary.\n\n\n\n\n\n\n\n\n\nutils.collators.batching.PretrainingBatchSamplerDataCollatorForSeq2Seq(\n    self,\n    *args,\n    multipack_attn=True,\n    **kwargs,\n)\nCollator for multipack specific to the using the BatchSampler\n\n\n\nutils.collators.batching.V2BatchSamplerDataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n    sequence_parallel_degree=1,\n    ring_attn_func=None,\n)\nCollator for multipack specific to the using the BatchSampler"
+    "text": "Name\nDescription\n\n\n\n\nBatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\nDataCollatorForSeq2Seq\nData collator that will dynamically pad the inputs received, as well as the labels and position_ids\n\n\nPretrainingBatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\nV2BatchSamplerDataCollatorForSeq2Seq\nCollator for multipack specific to the using the BatchSampler\n\n\n\n\n\nutils.collators.batching.BatchSamplerDataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n)\nCollator for multipack specific to the using the BatchSampler\n\n\n\nutils.collators.batching.DataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n)\nData collator that will dynamically pad the inputs received, as well as the labels and position_ids\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntokenizer\n[PreTrainedTokenizer] or [PreTrainedTokenizerFast]\nThe tokenizer used for encoding the data.\nrequired\n\n\nmodel\n[PreTrainedModel]\nThe model that is being trained. If set and has the prepare_decoder_input_ids_from_labels, use it to prepare the decoder_input_ids This is useful when using label_smoothing to avoid calculating loss twice.\nNone\n\n\npadding\nbool, str or [~utils.PaddingStrategy], optional, defaults to True\nSelect a strategy to pad the returned sequences (according to the model’s padding side and padding index) among: - True or 'longest' (default): Pad to the longest sequence in the batch (or no padding if only a single sequence is provided). - 'max_length': Pad to a maximum length specified with the argument max_length or to the maximum acceptable input length for the model if that argument is not provided. - False or 'do_not_pad': No padding (i.e., can output a batch with sequences of different lengths).\nTrue\n\n\nmax_length\nint, optional\nMaximum length of the returned list and optionally padding length (see above).\nNone\n\n\npad_to_multiple_of\nint, optional\nIf set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability &gt;= 7.5 (Volta).\nNone\n\n\nlabel_pad_token_id\nint, optional, defaults to -100\nThe id to use when padding the labels (-100 will be automatically ignored by PyTorch loss functions).\n-100\n\n\nreturn_tensors\nstr\nThe type of Tensor to return. Allowable values are “np”, “pt” and “tf”.\n'pt'\n\n\n\n\n\n\n\nutils.collators.batching.PretrainingBatchSamplerDataCollatorForSeq2Seq(\n    self,\n    *args,\n    multipack_attn=True,\n    **kwargs,\n)\nCollator for multipack specific to the using the BatchSampler\n\n\n\nutils.collators.batching.V2BatchSamplerDataCollatorForSeq2Seq(\n    self,\n    tokenizer,\n    model=None,\n    padding=True,\n    max_length=None,\n    pad_to_multiple_of=None,\n    label_pad_token_id=-100,\n    position_pad_token_id=0,\n    return_tensors='pt',\n)\nCollator for multipack specific to the using the BatchSampler"
   },
   {
     "objectID": "docs/api/utils.schemas.datasets.html",
diff --git a/sitemap.xml b/sitemap.xml
index 6e1d6284e..d7cb62dde 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,682 +2,682 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <url>
     <loc>https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html</loc>
-    <lastmod>2025-04-24T17:01:57.365Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.942Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/index.html</loc>
-    <lastmod>2025-04-24T17:01:57.377Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.954Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/rlhf.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.942Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/unsloth.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.942Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset_preprocessing.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/input_output.html</loc>
-    <lastmod>2025-04-24T17:01:57.363Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset_loading.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.mamba.html</loc>
-    <lastmod>2025-04-24T17:02:27.939Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.900Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html</loc>
-    <lastmod>2025-04-24T17:02:27.720Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.684Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html</loc>
-    <lastmod>2025-04-24T17:02:27.231Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.191Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.chat_templates.html</loc>
-    <lastmod>2025-04-24T17:02:27.622Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.586Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.merge_lora.html</loc>
-    <lastmod>2025-04-24T17:02:27.066Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.026Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html</loc>
-    <lastmod>2025-04-24T17:02:27.506Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.468Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.format.shared.html</loc>
-    <lastmod>2025-04-24T17:02:26.943Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.902Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html</loc>
-    <lastmod>2025-04-24T17:02:27.792Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.757Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.freeze.html</loc>
-    <lastmod>2025-04-24T17:02:27.647Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.611Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html</loc>
-    <lastmod>2025-04-24T17:02:27.223Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.183Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.attention.mllama.html</loc>
-    <lastmod>2025-04-24T17:02:27.574Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.536Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.model.html</loc>
-    <lastmod>2025-04-24T17:02:27.740Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.705Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html</loc>
-    <lastmod>2025-04-24T17:02:26.956Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.915Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html</loc>
-    <lastmod>2025-04-24T17:02:27.549Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.511Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html</loc>
-    <lastmod>2025-04-24T17:02:27.910Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.876Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.datasets.chat.html</loc>
-    <lastmod>2025-04-24T17:02:26.948Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.907Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html</loc>
-    <lastmod>2025-04-24T17:02:27.636Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.599Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html</loc>
-    <lastmod>2025-04-24T17:02:27.550Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.513Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.lora_embeddings.html</loc>
-    <lastmod>2025-04-24T17:02:27.630Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.594Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.peft.html</loc>
-    <lastmod>2025-04-24T17:02:27.772Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.736Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html</loc>
-    <lastmod>2025-04-24T17:02:26.942Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.900Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html</loc>
-    <lastmod>2025-04-24T17:02:27.504Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.466Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html</loc>
-    <lastmod>2025-04-24T17:02:27.284Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.244Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html</loc>
-    <lastmod>2025-04-24T17:02:27.307Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.268Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.distributed.html</loc>
-    <lastmod>2025-04-24T17:02:27.709Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.673Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html</loc>
-    <lastmod>2025-04-24T17:02:27.278Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.238Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.dict.html</loc>
-    <lastmod>2025-04-24T17:02:27.712Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.676Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html</loc>
-    <lastmod>2025-04-24T17:02:27.488Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.450Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.base.html</loc>
-    <lastmod>2025-04-24T17:02:27.182Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.141Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html</loc>
-    <lastmod>2025-04-24T17:02:27.575Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.538Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.swiglu.html</loc>
-    <lastmod>2025-04-24T17:02:27.453Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.415Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.messages.html</loc>
-    <lastmod>2025-04-24T17:02:26.939Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.897Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/index.html</loc>
-    <lastmod>2025-04-24T17:02:26.676Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.631Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html</loc>
-    <lastmod>2025-04-24T17:02:27.306Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.266Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/convert.html</loc>
-    <lastmod>2025-04-24T17:02:26.768Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.723Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html</loc>
-    <lastmod>2025-04-24T17:02:27.780Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.744Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.base.html</loc>
-    <lastmod>2025-04-24T17:02:27.153Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.112Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/evaluate.html</loc>
-    <lastmod>2025-04-24T17:02:26.747Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.702Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html</loc>
-    <lastmod>2025-04-24T17:02:27.490Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.452Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.quantize.html</loc>
-    <lastmod>2025-04-24T17:02:27.461Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.423Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html</loc>
-    <lastmod>2025-04-24T17:02:27.968Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.929Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html</loc>
-    <lastmod>2025-04-24T17:02:27.962Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.924Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html</loc>
-    <lastmod>2025-04-24T17:02:27.177Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.136Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.vllm_serve.html</loc>
-    <lastmod>2025-04-24T17:02:27.130Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.088Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/train.html</loc>
-    <lastmod>2025-04-24T17:02:26.737Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.692Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html</loc>
-    <lastmod>2025-04-24T17:02:27.304Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.265Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.trl.html</loc>
-    <lastmod>2025-04-24T17:02:27.775Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.739Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.geglu.html</loc>
-    <lastmod>2025-04-24T17:02:27.443Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.405Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.bench.html</loc>
-    <lastmod>2025-04-24T17:02:27.639Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.603Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html</loc>
-    <lastmod>2025-04-24T17:02:27.565Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.528Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.liger.args.html</loc>
-    <lastmod>2025-04-24T17:02:27.883Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.849Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainer_builder.html</loc>
-    <lastmod>2025-04-24T17:02:26.830Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.785Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.utils.html</loc>
-    <lastmod>2025-04-24T17:02:27.805Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.769Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.lora.html</loc>
-    <lastmod>2025-04-24T17:02:27.433Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.395Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html</loc>
-    <lastmod>2025-04-24T17:02:27.350Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.311Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html</loc>
-    <lastmod>2025-04-24T17:02:27.180Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.139Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html</loc>
-    <lastmod>2025-04-24T17:02:27.282Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.243Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.tokenization.html</loc>
-    <lastmod>2025-04-24T17:02:27.612Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.576Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html</loc>
-    <lastmod>2025-04-24T17:02:27.196Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.156Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html</loc>
-    <lastmod>2025-04-24T17:02:27.260Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.221Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html</loc>
-    <lastmod>2025-04-24T17:02:27.952Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.913Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.args.html</loc>
-    <lastmod>2025-04-24T17:02:27.020Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.979Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html</loc>
-    <lastmod>2025-04-24T17:02:27.959Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.920Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.gradient_checkpointing.unsloth.html</loc>
-    <lastmod>2025-04-24T17:02:27.726Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.690Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/mac.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/config.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multimodal.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/lr_groups.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/index.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/pretraining.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multi-node.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/sequence_parallelism.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.942Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/batch_vs_grad.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.937Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/amd_hpc.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.937Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/faq.html</loc>
-    <lastmod>2025-04-24T17:01:57.361Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/custom_integrations.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html</loc>
-    <lastmod>2025-04-24T17:01:57.380Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.958Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/TODO.html</loc>
-    <lastmod>2025-04-24T17:01:57.359Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.936Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html</loc>
-    <lastmod>2025-04-24T17:01:57.380Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.958Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/getting-started.html</loc>
-    <lastmod>2025-04-24T17:01:57.361Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multipack.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multi-gpu.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/installation.html</loc>
-    <lastmod>2025-04-24T17:01:57.363Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/cli.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.937Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/tokenized.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/conversation.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/template_free.html</loc>
-    <lastmod>2025-04-24T17:01:57.360Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/reward_modelling.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.942Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/lora_optims.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/nccl.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/logging_config.html</loc>
-    <lastmod>2025-04-24T17:02:26.815Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.770Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.trainer.html</loc>
-    <lastmod>2025-04-24T17:02:27.664Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.628Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html</loc>
-    <lastmod>2025-04-24T17:02:27.567Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.530Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.evaluate.html</loc>
-    <lastmod>2025-04-24T17:02:27.003Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.962Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.utils.html</loc>
-    <lastmod>2025-04-24T17:02:27.462Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.424Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/datasets.html</loc>
-    <lastmod>2025-04-24T17:02:26.754Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.710Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.models.html</loc>
-    <lastmod>2025-04-24T17:02:27.605Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.569Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html</loc>
-    <lastmod>2025-04-24T17:02:27.316Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.277Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.preprocess.html</loc>
-    <lastmod>2025-04-24T17:02:27.088Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.046Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html</loc>
-    <lastmod>2025-04-24T17:02:27.080Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.037Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.base.html</loc>
-    <lastmod>2025-04-24T17:02:27.868Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.834Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html</loc>
-    <lastmod>2025-04-24T17:02:27.346Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.308Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.enums.html</loc>
-    <lastmod>2025-04-24T17:02:27.799Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.764Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html</loc>
-    <lastmod>2025-04-24T17:02:27.971Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.933Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html</loc>
-    <lastmod>2025-04-24T17:02:27.256Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.216Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schedulers.html</loc>
-    <lastmod>2025-04-24T17:02:27.688Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.653Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.cloud.base.html</loc>
-    <lastmod>2025-04-24T17:02:27.133Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.092Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.utils.html</loc>
-    <lastmod>2025-04-24T17:02:27.125Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.084Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html</loc>
-    <lastmod>2025-04-24T17:02:27.539Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.502Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html</loc>
-    <lastmod>2025-04-24T17:02:27.250Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.210Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.config.html</loc>
-    <lastmod>2025-04-24T17:02:27.734Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.698Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html</loc>
-    <lastmod>2025-04-24T17:02:27.514Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.476Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.config.html</loc>
-    <lastmod>2025-04-24T17:02:27.044Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.003Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.core.html</loc>
-    <lastmod>2025-04-24T17:02:27.911Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.878Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.data.sft.html</loc>
-    <lastmod>2025-04-24T17:02:27.722Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.687Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html</loc>
-    <lastmod>2025-04-24T17:02:27.890Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.856Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html</loc>
-    <lastmod>2025-04-24T17:02:27.294Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.254Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.inference.html</loc>
-    <lastmod>2025-04-24T17:02:27.058Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.017Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html</loc>
-    <lastmod>2025-04-24T17:02:27.211Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.171Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.batching.html</loc>
-    <lastmod>2025-04-24T17:02:27.936Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.897Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html</loc>
-    <lastmod>2025-04-24T17:02:27.763Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.727Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.data.pretraining.html</loc>
-    <lastmod>2025-04-24T17:02:27.721Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.685Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html</loc>
-    <lastmod>2025-04-24T17:02:27.308Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.269Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.utils.html</loc>
-    <lastmod>2025-04-24T17:02:27.547Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.510Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html</loc>
-    <lastmod>2025-04-24T17:02:27.964Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.925Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html</loc>
-    <lastmod>2025-04-24T17:02:27.210Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.169Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html</loc>
-    <lastmod>2025-04-24T17:02:27.326Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.287Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.training_args.html</loc>
-    <lastmod>2025-04-24T17:02:26.916Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.874Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html</loc>
-    <lastmod>2025-04-24T17:02:27.944Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.905Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/common.architectures.html</loc>
-    <lastmod>2025-04-24T17:02:27.891Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.857Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html</loc>
-    <lastmod>2025-04-24T17:02:27.880Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.846Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html</loc>
-    <lastmod>2025-04-24T17:02:27.244Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.204Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.trl.html</loc>
-    <lastmod>2025-04-24T17:02:27.170Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.129Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html</loc>
-    <lastmod>2025-04-24T17:02:27.271Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.231Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html</loc>
-    <lastmod>2025-04-24T17:02:27.871Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.837Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.lora.html</loc>
-    <lastmod>2025-04-24T17:02:27.627Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.591Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.sweeps.html</loc>
-    <lastmod>2025-04-24T17:02:27.094Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.052Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html</loc>
-    <lastmod>2025-04-24T17:02:26.940Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.899Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/common.const.html</loc>
-    <lastmod>2025-04-24T17:02:27.893Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.859Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html</loc>
-    <lastmod>2025-04-24T17:02:27.267Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.228Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html</loc>
-    <lastmod>2025-04-24T17:02:27.556Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.518Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html</loc>
-    <lastmod>2025-04-24T17:02:27.872Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.838Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.checks.html</loc>
-    <lastmod>2025-04-24T17:02:27.027Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.986Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/common.datasets.html</loc>
-    <lastmod>2025-04-24T17:02:27.909Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.875Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html</loc>
-    <lastmod>2025-04-24T17:02:27.886Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.852Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html</loc>
-    <lastmod>2025-04-24T17:02:27.577Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.539Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html</loc>
-    <lastmod>2025-04-24T17:02:27.325Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.285Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.train.html</loc>
-    <lastmod>2025-04-24T17:02:26.995Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.954Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_tokenizers.html</loc>
-    <lastmod>2025-04-24T17:02:26.809Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.765Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.training.html</loc>
-    <lastmod>2025-04-24T17:02:27.745Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.710Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.relora.html</loc>
-    <lastmod>2025-04-24T17:02:27.512Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.475Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html</loc>
-    <lastmod>2025-04-24T17:02:27.139Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.098Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.main.html</loc>
-    <lastmod>2025-04-24T17:02:26.987Z</lastmod>
+    <lastmod>2025-04-25T14:34:35.946Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html</loc>
-    <lastmod>2025-04-24T17:02:27.559Z</lastmod>
+    <lastmod>2025-04-25T14:34:36.522Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/fsdp_qlora.html</loc>
-    <lastmod>2025-04-24T17:01:57.361Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/debugging.html</loc>
-    <lastmod>2025-04-24T17:01:57.361Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/ray-integration.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/docker.html</loc>
-    <lastmod>2025-04-24T17:01:57.361Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.938Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/inference.html</loc>
-    <lastmod>2025-04-24T17:01:57.363Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/torchao.html</loc>
-    <lastmod>2025-04-24T17:01:57.364Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.942Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/FAQS.html</loc>
-    <lastmod>2025-04-24T17:01:57.358Z</lastmod>
+    <lastmod>2025-04-25T14:34:06.936Z</lastmod>
   </url>
 </urlset>

The type of Tensor to return. Allowable values are “np”, “pt” and “tf”.	`'pt'`
sequence_parallel_degree	`int`	The degree of sequence parallelism. Default to 1 for no sequence parallelism.	`1`