Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2025-04-16 17:53:19 +00:00
parent 256d474bda
commit 42b3a43d66
8 changed files with 232 additions and 216 deletions

View File

@@ -29,7 +29,7 @@ jobs:
cuda_version: 12.4.1
python_version: "3.11"
pytorch: 2.6.0
axolotl_extras:
axolotl_extras: vllm
is_latest: true
runs-on: axolotl-gpu-runner
steps:

View File

@@ -1 +1 @@
39794ace
9eefd995

View File

@@ -558,10 +558,11 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb1-44"><a href="#cb1-44" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-45"><a href="#cb1-45" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-46"><a href="#cb1-46" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb1-47"><a href="#cb1-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-48"><a href="#cb1-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-49"><a href="#cb1-49" aria-hidden="true" tabindex="-1"></a> simpo_gamma<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-50"><a href="#cb1-50" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb1-47"><a href="#cb1-47" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-48"><a href="#cb1-48" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-49"><a href="#cb1-49" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-50"><a href="#cb1-50" aria-hidden="true" tabindex="-1"></a> simpo_gamma<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-51"><a href="#cb1-51" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>CPO config for CPO training</p>
</section>
<section id="axolotl.core.training_args.AxolotlKTOConfig" class="level3">
@@ -612,9 +613,10 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb2-44"><a href="#cb2-44" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-45"><a href="#cb2-45" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-46"><a href="#cb2-46" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb2-47"><a href="#cb2-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-48"><a href="#cb2-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-49"><a href="#cb2-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb2-47"><a href="#cb2-47" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-48"><a href="#cb2-48" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-49"><a href="#cb2-49" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-50"><a href="#cb2-50" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>KTO config for KTO training</p>
</section>
<section id="axolotl.core.training_args.AxolotlORPOConfig" class="level3">
@@ -665,9 +667,10 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb3-44"><a href="#cb3-44" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-45"><a href="#cb3-45" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-46"><a href="#cb3-46" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-49"><a href="#cb3-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-49"><a href="#cb3-49" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb3-50"><a href="#cb3-50" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>ORPO config for ORPO training</p>
</section>
<section id="axolotl.core.training_args.AxolotlPRMConfig" class="level3">
@@ -718,9 +721,10 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb4-44"><a href="#cb4-44" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-45"><a href="#cb4-45" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-46"><a href="#cb4-46" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb4-47"><a href="#cb4-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-48"><a href="#cb4-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-49"><a href="#cb4-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb4-47"><a href="#cb4-47" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-48"><a href="#cb4-48" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-49"><a href="#cb4-49" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb4-50"><a href="#cb4-50" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>PRM config for PRM training</p>
</section>
<section id="axolotl.core.training_args.AxolotlRewardConfig" class="level3">
@@ -771,9 +775,10 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb5-44"><a href="#cb5-44" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-45"><a href="#cb5-45" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-46"><a href="#cb5-46" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb5-47"><a href="#cb5-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-48"><a href="#cb5-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-49"><a href="#cb5-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb5-47"><a href="#cb5-47" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-48"><a href="#cb5-48" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-49"><a href="#cb5-49" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-50"><a href="#cb5-50" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Reward config for Reward training</p>
</section>
<section id="axolotl.core.training_args.AxolotlTrainingArguments" class="level3">
@@ -824,9 +829,10 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb6-44"><a href="#cb6-44" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-45"><a href="#cb6-45" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-46"><a href="#cb6-46" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb6-47"><a href="#cb6-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-48"><a href="#cb6-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-49"><a href="#cb6-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb6-47"><a href="#cb6-47" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-48"><a href="#cb6-48" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-49"><a href="#cb6-49" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb6-50"><a href="#cb6-50" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Training arguments for Causal trainer</p>
<p>This code is duplicated due to HF TrainingArguments not setting output_dir with a
default value so it cant be used as a mixin.</p>
@@ -879,9 +885,10 @@ default value so it cant be used as a mixin.</p>
<span id="cb7-44"><a href="#cb7-44" aria-hidden="true" tabindex="-1"></a> kd_zscore_base_temp<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-45"><a href="#cb7-45" aria-hidden="true" tabindex="-1"></a> kd_top_k_before_softmax<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-46"><a href="#cb7-46" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb7-47"><a href="#cb7-47" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-48"><a href="#cb7-48" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-49"><a href="#cb7-49" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb7-47"><a href="#cb7-47" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-48"><a href="#cb7-48" aria-hidden="true" tabindex="-1"></a> image_size<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-49"><a href="#cb7-49" aria-hidden="true" tabindex="-1"></a> image_resize_algorithm<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb7-50"><a href="#cb7-50" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Mixin class for the Axolotl training args.</p>

View File

@@ -509,7 +509,8 @@ includes logic for handling sequence parallelism collation.</p>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a> position_pad_token_id<span class="op">=</span><span class="dv">0</span>,</span>
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a> return_tensors<span class="op">=</span><span class="st">'pt'</span>,</span>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Collator for multipack specific to the using the BatchSampler</p>
</section>
<section id="axolotl.utils.collators.batching.DataCollatorForSeq2Seq" class="level3">
@@ -525,7 +526,8 @@ includes logic for handling sequence parallelism collation.</p>
<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a> position_pad_token_id<span class="op">=</span><span class="dv">0</span>,</span>
<span id="cb2-10"><a href="#cb2-10" aria-hidden="true" tabindex="-1"></a> return_tensors<span class="op">=</span><span class="st">'pt'</span>,</span>
<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Data collator that will dynamically pad the inputs received, as well as the labels and position_ids</p>
<section id="parameters" class="level4 doc-section doc-section-parameters">
<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters">Parameters</h4>
@@ -690,7 +692,8 @@ includes logic for handling sequence parallelism collation.</p>
<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a> position_pad_token_id<span class="op">=</span><span class="dv">0</span>,</span>
<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a> return_tensors<span class="op">=</span><span class="st">'pt'</span>,</span>
<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a> sequence_parallel_degree<span class="op">=</span><span class="dv">1</span>,</span>
<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a> ring_attn_func<span class="op">=</span><span class="va">None</span>,</span>
<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Collator for multipack specific to the using the BatchSampler</p>

View File

@@ -1159,21 +1159,24 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb1-687"><a href="#cb1-687" aria-hidden="true" tabindex="-1"></a><span class="co"># Optional; strides across the key dimension. Larger values use more memory but should make training faster.</span></span>
<span id="cb1-688"><a href="#cb1-688" aria-hidden="true" tabindex="-1"></a><span class="co"># Must evenly divide the number of KV heads in your model.</span></span>
<span id="cb1-689"><a href="#cb1-689" aria-hidden="true" tabindex="-1"></a><span class="fu">heads_k_stride</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
<span id="cb1-690"><a href="#cb1-690" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-691"><a href="#cb1-691" aria-hidden="true" tabindex="-1"></a><span class="co"># Path to torch distx for optim 'adamw_anyprecision'</span></span>
<span id="cb1-692"><a href="#cb1-692" aria-hidden="true" tabindex="-1"></a><span class="fu">torchdistx_path</span><span class="kw">:</span></span>
<span id="cb1-690"><a href="#cb1-690" aria-hidden="true" tabindex="-1"></a><span class="co"># One of "varlen_llama3", "batch_ring", "batch_zigzag", "batch_stripe". Defaults to "varlen_llama3"</span></span>
<span id="cb1-691"><a href="#cb1-691" aria-hidden="true" tabindex="-1"></a><span class="co"># in the sample packing case, and "batch_ring" in the non-sample packing case.</span></span>
<span id="cb1-692"><a href="#cb1-692" aria-hidden="true" tabindex="-1"></a><span class="fu">ring_attn_func</span><span class="kw">:</span></span>
<span id="cb1-693"><a href="#cb1-693" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-694"><a href="#cb1-694" aria-hidden="true" tabindex="-1"></a><span class="co"># Set to HF dataset for type: 'completion' for streaming instead of pre-tokenize</span></span>
<span id="cb1-695"><a href="#cb1-695" aria-hidden="true" tabindex="-1"></a><span class="fu">pretraining_dataset</span><span class="kw">:</span></span>
<span id="cb1-694"><a href="#cb1-694" aria-hidden="true" tabindex="-1"></a><span class="co"># Path to torch distx for optim 'adamw_anyprecision'</span></span>
<span id="cb1-695"><a href="#cb1-695" aria-hidden="true" tabindex="-1"></a><span class="fu">torchdistx_path</span><span class="kw">:</span></span>
<span id="cb1-696"><a href="#cb1-696" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-697"><a href="#cb1-697" aria-hidden="true" tabindex="-1"></a><span class="co"># Debug mode</span></span>
<span id="cb1-698"><a href="#cb1-698" aria-hidden="true" tabindex="-1"></a><span class="fu">debug</span><span class="kw">:</span></span>
<span id="cb1-697"><a href="#cb1-697" aria-hidden="true" tabindex="-1"></a><span class="co"># Set to HF dataset for type: 'completion' for streaming instead of pre-tokenize</span></span>
<span id="cb1-698"><a href="#cb1-698" aria-hidden="true" tabindex="-1"></a><span class="fu">pretraining_dataset</span><span class="kw">:</span></span>
<span id="cb1-699"><a href="#cb1-699" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-700"><a href="#cb1-700" aria-hidden="true" tabindex="-1"></a><span class="co"># Seed</span></span>
<span id="cb1-701"><a href="#cb1-701" aria-hidden="true" tabindex="-1"></a><span class="fu">seed</span><span class="kw">:</span></span>
<span id="cb1-700"><a href="#cb1-700" aria-hidden="true" tabindex="-1"></a><span class="co"># Debug mode</span></span>
<span id="cb1-701"><a href="#cb1-701" aria-hidden="true" tabindex="-1"></a><span class="fu">debug</span><span class="kw">:</span></span>
<span id="cb1-702"><a href="#cb1-702" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-703"><a href="#cb1-703" aria-hidden="true" tabindex="-1"></a><span class="co"># Allow overwrite yml config using from cli</span></span>
<span id="cb1-704"><a href="#cb1-704" aria-hidden="true" tabindex="-1"></a><span class="fu">strict</span><span class="kw">:</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb1-703"><a href="#cb1-703" aria-hidden="true" tabindex="-1"></a><span class="co"># Seed</span></span>
<span id="cb1-704"><a href="#cb1-704" aria-hidden="true" tabindex="-1"></a><span class="fu">seed</span><span class="kw">:</span></span>
<span id="cb1-705"><a href="#cb1-705" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-706"><a href="#cb1-706" aria-hidden="true" tabindex="-1"></a><span class="co"># Allow overwrite yml config using from cli</span></span>
<span id="cb1-707"><a href="#cb1-707" aria-hidden="true" tabindex="-1"></a><span class="fu">strict</span><span class="kw">:</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>

View File

@@ -507,7 +507,10 @@ through a ring communication pattern.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Set to a divisor (&gt; 1) of the number of GPUs available</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">sequence_parallel_degree</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span><span class="co"> # Split sequences across 4 GPUs</span></span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Optional; strides across the key dimension. Larger values use more memory but should make training faster.</span></span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">heads_k_stride</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">heads_k_stride</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Optional; one of "varlen_llama3", "batch_ring", "batch_zigzag", "batch_stripe". Defaults to</span></span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="co"># "varlen_llama3" when `sample_packing: true`, and "batch_ring" otherwise.</span></span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="fu">ring_attn_func</span><span class="kw">:</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>The <code>sequence_parallel_degree</code> should be a divisor of the total number of GPUs. For example:</p>
<ul>
<li>With 8 GPUs, valid values would be 2, 4, or 8</li>

File diff suppressed because one or more lines are too long

View File

@@ -2,682 +2,682 @@
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html</loc>
<lastmod>2025-04-16T05:17:22.344Z</lastmod>
<lastmod>2025-04-16T17:51:04.412Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/template_free.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.408Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/tokenized.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.408Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/nccl.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/amd_hpc.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/config.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/multi-gpu.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/installation.html</loc>
<lastmod>2025-04-16T05:17:22.342Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/torchao.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/reward_modelling.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/input_output.html</loc>
<lastmod>2025-04-16T05:17:22.342Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/multimodal.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html</loc>
<lastmod>2025-04-16T05:17:53.625Z</lastmod>
<lastmod>2025-04-16T17:51:40.554Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html</loc>
<lastmod>2025-04-16T05:17:53.222Z</lastmod>
<lastmod>2025-04-16T17:51:40.134Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html</loc>
<lastmod>2025-04-16T05:17:53.238Z</lastmod>
<lastmod>2025-04-16T17:51:40.151Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html</loc>
<lastmod>2025-04-16T05:17:52.930Z</lastmod>
<lastmod>2025-04-16T17:51:39.827Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html</loc>
<lastmod>2025-04-16T05:17:53.168Z</lastmod>
<lastmod>2025-04-16T17:51:40.078Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html</loc>
<lastmod>2025-04-16T05:17:52.976Z</lastmod>
<lastmod>2025-04-16T17:51:39.875Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.liger.args.html</loc>
<lastmod>2025-04-16T05:17:53.543Z</lastmod>
<lastmod>2025-04-16T17:51:40.468Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.training.html</loc>
<lastmod>2025-04-16T05:17:53.407Z</lastmod>
<lastmod>2025-04-16T17:51:40.327Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/datasets.html</loc>
<lastmod>2025-04-16T05:17:52.433Z</lastmod>
<lastmod>2025-04-16T17:51:39.305Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.geglu.html</loc>
<lastmod>2025-04-16T05:17:53.109Z</lastmod>
<lastmod>2025-04-16T17:51:40.016Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html</loc>
<lastmod>2025-04-16T05:17:53.153Z</lastmod>
<lastmod>2025-04-16T17:51:40.062Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.sweeps.html</loc>
<lastmod>2025-04-16T05:17:52.766Z</lastmod>
<lastmod>2025-04-16T17:51:39.653Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.freeze.html</loc>
<lastmod>2025-04-16T05:17:53.309Z</lastmod>
<lastmod>2025-04-16T17:51:40.225Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html</loc>
<lastmod>2025-04-16T05:17:53.170Z</lastmod>
<lastmod>2025-04-16T17:51:40.079Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.main.html</loc>
<lastmod>2025-04-16T05:17:52.663Z</lastmod>
<lastmod>2025-04-16T17:51:39.545Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.trl.html</loc>
<lastmod>2025-04-16T05:17:52.841Z</lastmod>
<lastmod>2025-04-16T17:51:39.733Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html</loc>
<lastmod>2025-04-16T05:17:52.977Z</lastmod>
<lastmod>2025-04-16T17:51:39.876Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html</loc>
<lastmod>2025-04-16T05:17:52.617Z</lastmod>
<lastmod>2025-04-16T17:51:39.499Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html</loc>
<lastmod>2025-04-16T05:17:52.632Z</lastmod>
<lastmod>2025-04-16T17:51:39.514Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html</loc>
<lastmod>2025-04-16T05:17:52.994Z</lastmod>
<lastmod>2025-04-16T17:51:39.894Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.collators.mamba.html</loc>
<lastmod>2025-04-16T05:17:53.598Z</lastmod>
<lastmod>2025-04-16T17:51:40.526Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.base.html</loc>
<lastmod>2025-04-16T05:17:53.528Z</lastmod>
<lastmod>2025-04-16T17:51:40.452Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.bench.html</loc>
<lastmod>2025-04-16T05:17:53.302Z</lastmod>
<lastmod>2025-04-16T17:51:40.217Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.swiglu.html</loc>
<lastmod>2025-04-16T05:17:53.119Z</lastmod>
<lastmod>2025-04-16T17:51:40.026Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.shared.html</loc>
<lastmod>2025-04-16T05:17:52.619Z</lastmod>
<lastmod>2025-04-16T17:51:39.500Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html</loc>
<lastmod>2025-04-16T05:17:53.531Z</lastmod>
<lastmod>2025-04-16T17:51:40.455Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.datasets.chat.html</loc>
<lastmod>2025-04-16T05:17:52.624Z</lastmod>
<lastmod>2025-04-16T17:51:39.506Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html</loc>
<lastmod>2025-04-16T05:17:53.622Z</lastmod>
<lastmod>2025-04-16T17:51:40.550Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html</loc>
<lastmod>2025-04-16T05:17:53.532Z</lastmod>
<lastmod>2025-04-16T17:51:40.456Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html</loc>
<lastmod>2025-04-16T05:17:52.880Z</lastmod>
<lastmod>2025-04-16T17:51:39.774Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html</loc>
<lastmod>2025-04-16T05:17:52.881Z</lastmod>
<lastmod>2025-04-16T17:51:39.776Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html</loc>
<lastmod>2025-04-16T05:17:52.993Z</lastmod>
<lastmod>2025-04-16T17:51:39.893Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html</loc>
<lastmod>2025-04-16T05:17:53.453Z</lastmod>
<lastmod>2025-04-16T17:51:40.374Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.trl.html</loc>
<lastmod>2025-04-16T05:17:53.436Z</lastmod>
<lastmod>2025-04-16T17:51:40.357Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_tokenizers.html</loc>
<lastmod>2025-04-16T05:17:52.488Z</lastmod>
<lastmod>2025-04-16T17:51:39.362Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.data.sft.html</loc>
<lastmod>2025-04-16T05:17:53.384Z</lastmod>
<lastmod>2025-04-16T17:51:40.303Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schedulers.html</loc>
<lastmod>2025-04-16T05:17:53.350Z</lastmod>
<lastmod>2025-04-16T17:51:40.267Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.chat_templates.html</loc>
<lastmod>2025-04-16T05:17:53.285Z</lastmod>
<lastmod>2025-04-16T17:51:40.199Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.models.html</loc>
<lastmod>2025-04-16T05:17:53.268Z</lastmod>
<lastmod>2025-04-16T17:51:40.182Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html</loc>
<lastmod>2025-04-16T05:17:52.973Z</lastmod>
<lastmod>2025-04-16T17:51:39.872Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.distributed.html</loc>
<lastmod>2025-04-16T05:17:53.371Z</lastmod>
<lastmod>2025-04-16T17:51:40.289Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.utils.html</loc>
<lastmod>2025-04-16T05:17:53.211Z</lastmod>
<lastmod>2025-04-16T17:51:40.122Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.utils.html</loc>
<lastmod>2025-04-16T05:17:53.465Z</lastmod>
<lastmod>2025-04-16T17:51:40.387Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html</loc>
<lastmod>2025-04-16T05:17:53.178Z</lastmod>
<lastmod>2025-04-16T17:51:40.088Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/common.datasets.html</loc>
<lastmod>2025-04-16T05:17:53.568Z</lastmod>
<lastmod>2025-04-16T17:51:40.494Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/logging_config.html</loc>
<lastmod>2025-04-16T05:17:52.493Z</lastmod>
<lastmod>2025-04-16T17:51:39.367Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.quantize.html</loc>
<lastmod>2025-04-16T05:17:53.126Z</lastmod>
<lastmod>2025-04-16T17:51:40.034Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html</loc>
<lastmod>2025-04-16T05:17:53.214Z</lastmod>
<lastmod>2025-04-16T17:51:40.125Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.model.html</loc>
<lastmod>2025-04-16T05:17:53.402Z</lastmod>
<lastmod>2025-04-16T17:51:40.321Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html</loc>
<lastmod>2025-04-16T05:17:53.219Z</lastmod>
<lastmod>2025-04-16T17:51:40.131Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html</loc>
<lastmod>2025-04-16T05:17:53.240Z</lastmod>
<lastmod>2025-04-16T17:51:40.152Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.tokenization.html</loc>
<lastmod>2025-04-16T05:17:53.275Z</lastmod>
<lastmod>2025-04-16T17:51:40.189Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html</loc>
<lastmod>2025-04-16T05:17:53.540Z</lastmod>
<lastmod>2025-04-16T17:51:40.464Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html</loc>
<lastmod>2025-04-16T05:17:53.424Z</lastmod>
<lastmod>2025-04-16T17:51:40.344Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.collators.core.html</loc>
<lastmod>2025-04-16T05:17:53.571Z</lastmod>
<lastmod>2025-04-16T17:51:40.497Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html</loc>
<lastmod>2025-04-16T05:17:53.212Z</lastmod>
<lastmod>2025-04-16T17:51:40.123Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html</loc>
<lastmod>2025-04-16T05:17:53.382Z</lastmod>
<lastmod>2025-04-16T17:51:40.300Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html</loc>
<lastmod>2025-04-16T05:17:52.926Z</lastmod>
<lastmod>2025-04-16T17:51:39.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/index.html</loc>
<lastmod>2025-04-16T05:17:52.356Z</lastmod>
<lastmod>2025-04-16T17:51:39.225Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html</loc>
<lastmod>2025-04-16T05:17:52.810Z</lastmod>
<lastmod>2025-04-16T17:51:39.700Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html</loc>
<lastmod>2025-04-16T05:17:52.963Z</lastmod>
<lastmod>2025-04-16T17:51:39.862Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.train.html</loc>
<lastmod>2025-04-16T05:17:52.671Z</lastmod>
<lastmod>2025-04-16T17:51:39.554Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainer_builder.html</loc>
<lastmod>2025-04-16T05:17:52.508Z</lastmod>
<lastmod>2025-04-16T17:51:39.383Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html</loc>
<lastmod>2025-04-16T05:17:53.617Z</lastmod>
<lastmod>2025-04-16T17:51:40.545Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/getting-started.html</loc>
<lastmod>2025-04-16T05:17:22.340Z</lastmod>
<lastmod>2025-04-16T17:51:04.408Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset_loading.html</loc>
<lastmod>2025-04-16T05:17:22.340Z</lastmod>
<lastmod>2025-04-16T17:51:04.408Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/batch_vs_grad.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/faq.html</loc>
<lastmod>2025-04-16T05:17:22.340Z</lastmod>
<lastmod>2025-04-16T17:51:04.408Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/debugging.html</loc>
<lastmod>2025-04-16T05:17:22.340Z</lastmod>
<lastmod>2025-04-16T17:51:04.408Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/lr_groups.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/TODO.html</loc>
<lastmod>2025-04-16T05:17:22.338Z</lastmod>
<lastmod>2025-04-16T17:51:04.406Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html</loc>
<lastmod>2025-04-16T05:17:22.359Z</lastmod>
<lastmod>2025-04-16T17:51:04.427Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/index.html</loc>
<lastmod>2025-04-16T05:17:22.355Z</lastmod>
<lastmod>2025-04-16T17:51:04.424Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html</loc>
<lastmod>2025-04-16T05:17:22.359Z</lastmod>
<lastmod>2025-04-16T17:51:04.427Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/FAQS.html</loc>
<lastmod>2025-04-16T05:17:22.338Z</lastmod>
<lastmod>2025-04-16T17:51:04.406Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/multi-node.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/sequence_parallelism.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/multipack.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/inference.html</loc>
<lastmod>2025-04-16T05:17:22.342Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/lora_optims.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.lora_embeddings.html</loc>
<lastmod>2025-04-16T05:17:53.293Z</lastmod>
<lastmod>2025-04-16T17:51:40.207Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.utils.html</loc>
<lastmod>2025-04-16T05:17:53.128Z</lastmod>
<lastmod>2025-04-16T17:51:40.035Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html</loc>
<lastmod>2025-04-16T05:17:52.866Z</lastmod>
<lastmod>2025-04-16T17:51:39.760Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/convert.html</loc>
<lastmod>2025-04-16T05:17:52.447Z</lastmod>
<lastmod>2025-04-16T17:51:39.319Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/common.const.html</loc>
<lastmod>2025-04-16T05:17:53.552Z</lastmod>
<lastmod>2025-04-16T17:51:40.477Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.cloud.base.html</loc>
<lastmod>2025-04-16T05:17:52.804Z</lastmod>
<lastmod>2025-04-16T17:51:39.693Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.relora.html</loc>
<lastmod>2025-04-16T05:17:53.177Z</lastmod>
<lastmod>2025-04-16T17:51:40.087Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.lora.html</loc>
<lastmod>2025-04-16T05:17:53.289Z</lastmod>
<lastmod>2025-04-16T17:51:40.204Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.merge_lora.html</loc>
<lastmod>2025-04-16T05:17:52.740Z</lastmod>
<lastmod>2025-04-16T17:51:39.627Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html</loc>
<lastmod>2025-04-16T05:17:53.018Z</lastmod>
<lastmod>2025-04-16T17:51:39.919Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html</loc>
<lastmod>2025-04-16T05:17:52.752Z</lastmod>
<lastmod>2025-04-16T17:51:39.639Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html</loc>
<lastmod>2025-04-16T05:17:53.549Z</lastmod>
<lastmod>2025-04-16T17:51:40.474Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html</loc>
<lastmod>2025-04-16T05:17:53.569Z</lastmod>
<lastmod>2025-04-16T17:51:40.495Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/common.architectures.html</loc>
<lastmod>2025-04-16T05:17:53.551Z</lastmod>
<lastmod>2025-04-16T17:51:40.476Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.trainer.html</loc>
<lastmod>2025-04-16T05:17:53.326Z</lastmod>
<lastmod>2025-04-16T17:51:40.242Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html</loc>
<lastmod>2025-04-16T05:17:53.629Z</lastmod>
<lastmod>2025-04-16T17:51:40.558Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.vllm_serve.html</loc>
<lastmod>2025-04-16T05:17:52.801Z</lastmod>
<lastmod>2025-04-16T17:51:39.690Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html</loc>
<lastmod>2025-04-16T05:17:53.441Z</lastmod>
<lastmod>2025-04-16T17:51:40.362Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.gradient_checkpointing.unsloth.html</loc>
<lastmod>2025-04-16T05:17:53.388Z</lastmod>
<lastmod>2025-04-16T17:51:40.306Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.base.html</loc>
<lastmod>2025-04-16T05:17:52.824Z</lastmod>
<lastmod>2025-04-16T17:51:39.715Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html</loc>
<lastmod>2025-04-16T05:17:53.230Z</lastmod>
<lastmod>2025-04-16T17:51:40.142Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html</loc>
<lastmod>2025-04-16T05:17:53.610Z</lastmod>
<lastmod>2025-04-16T17:51:40.539Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html</loc>
<lastmod>2025-04-16T05:17:53.620Z</lastmod>
<lastmod>2025-04-16T17:51:40.549Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html</loc>
<lastmod>2025-04-16T05:17:53.546Z</lastmod>
<lastmod>2025-04-16T17:51:40.471Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.data.pretraining.html</loc>
<lastmod>2025-04-16T05:17:53.383Z</lastmod>
<lastmod>2025-04-16T17:51:40.301Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/evaluate.html</loc>
<lastmod>2025-04-16T05:17:52.426Z</lastmod>
<lastmod>2025-04-16T17:51:39.298Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.dict.html</loc>
<lastmod>2025-04-16T05:17:53.374Z</lastmod>
<lastmod>2025-04-16T17:51:40.292Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.utils.html</loc>
<lastmod>2025-04-16T05:17:52.796Z</lastmod>
<lastmod>2025-04-16T17:51:39.685Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html</loc>
<lastmod>2025-04-16T05:17:52.947Z</lastmod>
<lastmod>2025-04-16T17:51:39.845Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.training_args.html</loc>
<lastmod>2025-04-16T05:17:52.592Z</lastmod>
<lastmod>2025-04-16T17:51:39.473Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.inference.html</loc>
<lastmod>2025-04-16T05:17:52.732Z</lastmod>
<lastmod>2025-04-16T17:51:39.618Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.lora.html</loc>
<lastmod>2025-04-16T05:17:53.099Z</lastmod>
<lastmod>2025-04-16T17:51:40.005Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.evaluate.html</loc>
<lastmod>2025-04-16T05:17:52.679Z</lastmod>
<lastmod>2025-04-16T17:51:39.562Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.collators.batching.html</loc>
<lastmod>2025-04-16T05:17:53.594Z</lastmod>
<lastmod>2025-04-16T17:51:40.522Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html</loc>
<lastmod>2025-04-16T05:17:52.920Z</lastmod>
<lastmod>2025-04-16T17:51:39.816Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html</loc>
<lastmod>2025-04-16T05:17:52.974Z</lastmod>
<lastmod>2025-04-16T17:51:39.873Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html</loc>
<lastmod>2025-04-16T05:17:52.937Z</lastmod>
<lastmod>2025-04-16T17:51:39.834Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html</loc>
<lastmod>2025-04-16T05:17:53.015Z</lastmod>
<lastmod>2025-04-16T17:51:39.915Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html</loc>
<lastmod>2025-04-16T05:17:52.893Z</lastmod>
<lastmod>2025-04-16T17:51:39.788Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html</loc>
<lastmod>2025-04-16T05:17:53.298Z</lastmod>
<lastmod>2025-04-16T17:51:40.213Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.config.html</loc>
<lastmod>2025-04-16T05:17:52.718Z</lastmod>
<lastmod>2025-04-16T17:51:39.604Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.enums.html</loc>
<lastmod>2025-04-16T05:17:53.460Z</lastmod>
<lastmod>2025-04-16T17:51:40.381Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.preprocess.html</loc>
<lastmod>2025-04-16T05:17:52.760Z</lastmod>
<lastmod>2025-04-16T17:51:39.647Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.chat.messages.html</loc>
<lastmod>2025-04-16T05:17:52.614Z</lastmod>
<lastmod>2025-04-16T17:51:39.496Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html</loc>
<lastmod>2025-04-16T05:17:52.953Z</lastmod>
<lastmod>2025-04-16T17:51:39.851Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.peft.html</loc>
<lastmod>2025-04-16T05:17:53.433Z</lastmod>
<lastmod>2025-04-16T17:51:40.353Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/train.html</loc>
<lastmod>2025-04-16T05:17:52.416Z</lastmod>
<lastmod>2025-04-16T17:51:39.287Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html</loc>
<lastmod>2025-04-16T05:17:52.951Z</lastmod>
<lastmod>2025-04-16T17:51:39.849Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html</loc>
<lastmod>2025-04-16T05:17:52.940Z</lastmod>
<lastmod>2025-04-16T17:51:39.838Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html</loc>
<lastmod>2025-04-16T05:17:53.602Z</lastmod>
<lastmod>2025-04-16T17:51:40.530Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html</loc>
<lastmod>2025-04-16T05:17:52.985Z</lastmod>
<lastmod>2025-04-16T17:51:39.885Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.attention.mllama.html</loc>
<lastmod>2025-04-16T05:17:53.237Z</lastmod>
<lastmod>2025-04-16T17:51:40.149Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.checks.html</loc>
<lastmod>2025-04-16T05:17:52.701Z</lastmod>
<lastmod>2025-04-16T17:51:39.586Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html</loc>
<lastmod>2025-04-16T05:17:53.229Z</lastmod>
<lastmod>2025-04-16T17:51:40.141Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html</loc>
<lastmod>2025-04-16T05:17:53.154Z</lastmod>
<lastmod>2025-04-16T17:51:40.063Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html</loc>
<lastmod>2025-04-16T05:17:52.847Z</lastmod>
<lastmod>2025-04-16T17:51:39.740Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html</loc>
<lastmod>2025-04-16T05:17:52.901Z</lastmod>
<lastmod>2025-04-16T17:51:39.796Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.args.html</loc>
<lastmod>2025-04-16T05:17:52.695Z</lastmod>
<lastmod>2025-04-16T17:51:39.579Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html</loc>
<lastmod>2025-04-16T05:17:52.914Z</lastmod>
<lastmod>2025-04-16T17:51:39.810Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.config.html</loc>
<lastmod>2025-04-16T05:17:53.395Z</lastmod>
<lastmod>2025-04-16T17:51:40.314Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html</loc>
<lastmod>2025-04-16T05:17:52.850Z</lastmod>
<lastmod>2025-04-16T17:51:39.743Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html</loc>
<lastmod>2025-04-16T05:17:52.616Z</lastmod>
<lastmod>2025-04-16T17:51:39.497Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html</loc>
<lastmod>2025-04-16T05:17:53.203Z</lastmod>
<lastmod>2025-04-16T17:51:40.114Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.base.html</loc>
<lastmod>2025-04-16T05:17:52.852Z</lastmod>
<lastmod>2025-04-16T17:51:39.745Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/rlhf.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/cli.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/unsloth.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/fsdp_qlora.html</loc>
<lastmod>2025-04-16T05:17:22.340Z</lastmod>
<lastmod>2025-04-16T17:51:04.408Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset_preprocessing.html</loc>
<lastmod>2025-04-16T05:17:22.340Z</lastmod>
<lastmod>2025-04-16T17:51:04.408Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/custom_integrations.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/mac.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/docker.html</loc>
<lastmod>2025-04-16T05:17:22.340Z</lastmod>
<lastmod>2025-04-16T17:51:04.408Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/ray-integration.html</loc>
<lastmod>2025-04-16T05:17:22.343Z</lastmod>
<lastmod>2025-04-16T17:51:04.411Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/index.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/conversation.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/pretraining.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html</loc>
<lastmod>2025-04-16T05:17:22.339Z</lastmod>
<lastmod>2025-04-16T17:51:04.407Z</lastmod>
</url>
</urlset>