Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2025-10-10 13:02:42 +00:00
parent 9436e959d9
commit 6fdb47fd4d
4 changed files with 297 additions and 292 deletions

View File

@@ -1 +1 @@
4f6c81ae
1b4fe270

View File

@@ -677,6 +677,10 @@ also follow the config field mapping below to update field names.</p>
<td>fsdp_use_orig_params</td>
<td><strong>REMOVED</strong></td>
</tr>
<tr class="odd">
<td>fsdp_activation_checkpointing</td>
<td>activation_checkpointing</td>
</tr>
</tbody>
</table>
<p>For more details, please see the migration guide in the <a href="https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md">torchtitan repo</a>. In Axolotl,
@@ -1321,98 +1325,99 @@ single sequence causes OOM errors during model training.</p>
<span id="cb6-88"><a href="#cb6-88" aria-hidden="true" tabindex="-1"></a>fsdp_cpu_ram_efficient_loading | cpu_ram_efficient_loading</span>
<span id="cb6-89"><a href="#cb6-89" aria-hidden="true" tabindex="-1"></a>fsdp_state_dict_type | state_dict_type</span>
<span id="cb6-90"><a href="#cb6-90" aria-hidden="true" tabindex="-1"></a>fsdp_use_orig_params | **REMOVED**</span>
<span id="cb6-91"><a href="#cb6-91" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-92"><a href="#cb6-92" aria-hidden="true" tabindex="-1"></a>For more details, please see the migration guide in the <span class="co">[</span><span class="ot">torchtitan repo</span><span class="co">](https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md)</span>. In Axolotl,</span>
<span id="cb6-93"><a href="#cb6-93" aria-hidden="true" tabindex="-1"></a>if you were using the following FSDP1 config:</span>
<span id="cb6-94"><a href="#cb6-94" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-95"><a href="#cb6-95" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
<span id="cb6-96"><a href="#cb6-96" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_version</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
<span id="cb6-97"><a href="#cb6-97" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
<span id="cb6-98"><a href="#cb6-98" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
<span id="cb6-99"><a href="#cb6-99" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_cpu_ram_efficient_loading</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb6-100"><a href="#cb6-100" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_auto_wrap_policy</span><span class="kw">:</span><span class="at"> TRANSFORMER_BASED_WRAP</span></span>
<span id="cb6-101"><a href="#cb6-101" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> Qwen3DecoderLayer</span></span>
<span id="cb6-102"><a href="#cb6-102" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_state_dict_type</span><span class="kw">:</span><span class="at"> FULL_STATE_DICT</span></span>
<span id="cb6-103"><a href="#cb6-103" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_sharding_strategy</span><span class="kw">:</span><span class="at"> FULL_SHARD</span></span>
<span id="cb6-104"><a href="#cb6-104" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb6-105"><a href="#cb6-105" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-106"><a href="#cb6-106" aria-hidden="true" tabindex="-1"></a>You can migrate to the following FSDP2 config:</span>
<span id="cb6-107"><a href="#cb6-107" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-108"><a href="#cb6-108" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
<span id="cb6-109"><a href="#cb6-109" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_version</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
<span id="cb6-110"><a href="#cb6-110" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
<span id="cb6-111"><a href="#cb6-111" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
<span id="cb6-112"><a href="#cb6-112" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">cpu_ram_efficient_loading</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb6-113"><a href="#cb6-113" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">auto_wrap_policy</span><span class="kw">:</span><span class="at"> TRANSFORMER_BASED_WRAP</span></span>
<span id="cb6-114"><a href="#cb6-114" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> Qwen3DecoderLayer</span></span>
<span id="cb6-115"><a href="#cb6-115" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">state_dict_type</span><span class="kw">:</span><span class="at"> FULL_STATE_DICT</span></span>
<span id="cb6-116"><a href="#cb6-116" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reshard_after_forward</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb6-117"><a href="#cb6-117" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb6-118"><a href="#cb6-118" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-119"><a href="#cb6-119" aria-hidden="true" tabindex="-1"></a><span class="fu">### FSDP1 (deprecated) {#sec-fsdp-config}</span></span>
<span id="cb6-120"><a href="#cb6-120" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-121"><a href="#cb6-121" aria-hidden="true" tabindex="-1"></a>::: {.callout-note}</span>
<span id="cb6-122"><a href="#cb6-122" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-123"><a href="#cb6-123" aria-hidden="true" tabindex="-1"></a>Using <span class="in">`fsdp`</span> to configure FSDP is deprecated and will be removed in an upcoming release of Axolotl. Please use <span class="in">`fsdp_config`</span> as above instead.</span>
<span id="cb6-124"><a href="#cb6-124" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-125"><a href="#cb6-125" aria-hidden="true" tabindex="-1"></a>:::</span>
<span id="cb6-126"><a href="#cb6-126" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-127"><a href="#cb6-127" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
<span id="cb6-128"><a href="#cb6-128" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp</span><span class="kw">:</span></span>
<span id="cb6-129"><a href="#cb6-129" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span>full_shard</span>
<span id="cb6-130"><a href="#cb6-130" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span>auto_wrap</span>
<span id="cb6-131"><a href="#cb6-131" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
<span id="cb6-132"><a href="#cb6-132" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb6-133"><a href="#cb6-133" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_state_dict_type</span><span class="kw">:</span><span class="at"> FULL_STATE_DICT</span></span>
<span id="cb6-134"><a href="#cb6-134" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> LlamaDecoderLayer</span></span>
<span id="cb6-135"><a href="#cb6-135" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb6-136"><a href="#cb6-136" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-91"><a href="#cb6-91" aria-hidden="true" tabindex="-1"></a>fsdp_activation_checkpointing | activation_checkpointing</span>
<span id="cb6-92"><a href="#cb6-92" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-93"><a href="#cb6-93" aria-hidden="true" tabindex="-1"></a>For more details, please see the migration guide in the <span class="co">[</span><span class="ot">torchtitan repo</span><span class="co">](https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md)</span>. In Axolotl,</span>
<span id="cb6-94"><a href="#cb6-94" aria-hidden="true" tabindex="-1"></a>if you were using the following FSDP1 config:</span>
<span id="cb6-95"><a href="#cb6-95" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-96"><a href="#cb6-96" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
<span id="cb6-97"><a href="#cb6-97" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_version</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
<span id="cb6-98"><a href="#cb6-98" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
<span id="cb6-99"><a href="#cb6-99" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
<span id="cb6-100"><a href="#cb6-100" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_cpu_ram_efficient_loading</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb6-101"><a href="#cb6-101" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_auto_wrap_policy</span><span class="kw">:</span><span class="at"> TRANSFORMER_BASED_WRAP</span></span>
<span id="cb6-102"><a href="#cb6-102" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> Qwen3DecoderLayer</span></span>
<span id="cb6-103"><a href="#cb6-103" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_state_dict_type</span><span class="kw">:</span><span class="at"> FULL_STATE_DICT</span></span>
<span id="cb6-104"><a href="#cb6-104" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_sharding_strategy</span><span class="kw">:</span><span class="at"> FULL_SHARD</span></span>
<span id="cb6-105"><a href="#cb6-105" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb6-106"><a href="#cb6-106" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-107"><a href="#cb6-107" aria-hidden="true" tabindex="-1"></a>You can migrate to the following FSDP2 config:</span>
<span id="cb6-108"><a href="#cb6-108" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-109"><a href="#cb6-109" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
<span id="cb6-110"><a href="#cb6-110" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_version</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
<span id="cb6-111"><a href="#cb6-111" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
<span id="cb6-112"><a href="#cb6-112" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
<span id="cb6-113"><a href="#cb6-113" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">cpu_ram_efficient_loading</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb6-114"><a href="#cb6-114" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">auto_wrap_policy</span><span class="kw">:</span><span class="at"> TRANSFORMER_BASED_WRAP</span></span>
<span id="cb6-115"><a href="#cb6-115" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> Qwen3DecoderLayer</span></span>
<span id="cb6-116"><a href="#cb6-116" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">state_dict_type</span><span class="kw">:</span><span class="at"> FULL_STATE_DICT</span></span>
<span id="cb6-117"><a href="#cb6-117" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reshard_after_forward</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb6-118"><a href="#cb6-118" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb6-119"><a href="#cb6-119" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-120"><a href="#cb6-120" aria-hidden="true" tabindex="-1"></a><span class="fu">### FSDP1 (deprecated) {#sec-fsdp-config}</span></span>
<span id="cb6-121"><a href="#cb6-121" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-122"><a href="#cb6-122" aria-hidden="true" tabindex="-1"></a>::: {.callout-note}</span>
<span id="cb6-123"><a href="#cb6-123" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-124"><a href="#cb6-124" aria-hidden="true" tabindex="-1"></a>Using <span class="in">`fsdp`</span> to configure FSDP is deprecated and will be removed in an upcoming release of Axolotl. Please use <span class="in">`fsdp_config`</span> as above instead.</span>
<span id="cb6-125"><a href="#cb6-125" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-126"><a href="#cb6-126" aria-hidden="true" tabindex="-1"></a>:::</span>
<span id="cb6-127"><a href="#cb6-127" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-128"><a href="#cb6-128" aria-hidden="true" tabindex="-1"></a><span class="in">```{.yaml}</span></span>
<span id="cb6-129"><a href="#cb6-129" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp</span><span class="kw">:</span></span>
<span id="cb6-130"><a href="#cb6-130" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span>full_shard</span>
<span id="cb6-131"><a href="#cb6-131" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span>auto_wrap</span>
<span id="cb6-132"><a href="#cb6-132" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
<span id="cb6-133"><a href="#cb6-133" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb6-134"><a href="#cb6-134" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_state_dict_type</span><span class="kw">:</span><span class="at"> FULL_STATE_DICT</span></span>
<span id="cb6-135"><a href="#cb6-135" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> LlamaDecoderLayer</span></span>
<span id="cb6-136"><a href="#cb6-136" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
<span id="cb6-137"><a href="#cb6-137" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-138"><a href="#cb6-138" aria-hidden="true" tabindex="-1"></a><span class="fu">## Sequence parallelism {#sec-sequence-parallelism}</span></span>
<span id="cb6-139"><a href="#cb6-139" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-140"><a href="#cb6-140" aria-hidden="true" tabindex="-1"></a>We support sequence parallelism (SP) via the</span>
<span id="cb6-141"><a href="#cb6-141" aria-hidden="true" tabindex="-1"></a><span class="co">[</span><span class="ot">ring-flash-attention</span><span class="co">](https://github.com/zhuzilin/ring-flash-attention)</span> project. This</span>
<span id="cb6-142"><a href="#cb6-142" aria-hidden="true" tabindex="-1"></a>allows one to split up sequences across GPUs, which is useful in the event that a</span>
<span id="cb6-143"><a href="#cb6-143" aria-hidden="true" tabindex="-1"></a>single sequence causes OOM errors during model training.</span>
<span id="cb6-144"><a href="#cb6-144" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-145"><a href="#cb6-145" aria-hidden="true" tabindex="-1"></a>See our <span class="co">[</span><span class="ot">dedicated guide</span><span class="co">](sequence_parallelism.qmd)</span> for more information.</span>
<span id="cb6-146"><a href="#cb6-146" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-147"><a href="#cb6-147" aria-hidden="true" tabindex="-1"></a><span class="fu">### FSDP + QLoRA {#sec-fsdp-qlora}</span></span>
<span id="cb6-148"><a href="#cb6-148" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-149"><a href="#cb6-149" aria-hidden="true" tabindex="-1"></a>For combining FSDP with QLoRA, see our <span class="co">[</span><span class="ot">dedicated guide</span><span class="co">](fsdp_qlora.qmd)</span>.</span>
<span id="cb6-150"><a href="#cb6-150" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-151"><a href="#cb6-151" aria-hidden="true" tabindex="-1"></a><span class="fu">## Performance Optimization {#sec-performance}</span></span>
<span id="cb6-152"><a href="#cb6-152" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-153"><a href="#cb6-153" aria-hidden="true" tabindex="-1"></a><span class="fu">### Liger Kernel Integration {#sec-liger}</span></span>
<span id="cb6-154"><a href="#cb6-154" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-155"><a href="#cb6-155" aria-hidden="true" tabindex="-1"></a>Please see <span class="co">[</span><span class="ot">docs</span><span class="co">](custom_integrations.qmd#liger)</span> for more info.</span>
<span id="cb6-156"><a href="#cb6-156" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-157"><a href="#cb6-157" aria-hidden="true" tabindex="-1"></a><span class="fu">## Troubleshooting {#sec-troubleshooting}</span></span>
<span id="cb6-158"><a href="#cb6-158" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-159"><a href="#cb6-159" aria-hidden="true" tabindex="-1"></a><span class="fu">### NCCL Issues {#sec-nccl}</span></span>
<span id="cb6-160"><a href="#cb6-160" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-161"><a href="#cb6-161" aria-hidden="true" tabindex="-1"></a>For NCCL-related problems, see our <span class="co">[</span><span class="ot">NCCL troubleshooting guide</span><span class="co">](nccl.qmd)</span>.</span>
<span id="cb6-162"><a href="#cb6-162" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-163"><a href="#cb6-163" aria-hidden="true" tabindex="-1"></a><span class="fu">### Common Problems {#sec-common-problems}</span></span>
<span id="cb6-164"><a href="#cb6-164" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-165"><a href="#cb6-165" aria-hidden="true" tabindex="-1"></a>::: {.panel-tabset}</span>
<span id="cb6-166"><a href="#cb6-166" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-167"><a href="#cb6-167" aria-hidden="true" tabindex="-1"></a><span class="fu">## Memory Issues</span></span>
<span id="cb6-168"><a href="#cb6-168" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-169"><a href="#cb6-169" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Reduce <span class="in">`micro_batch_size`</span></span>
<span id="cb6-170"><a href="#cb6-170" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Reduce <span class="in">`eval_batch_size`</span></span>
<span id="cb6-171"><a href="#cb6-171" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Adjust <span class="in">`gradient_accumulation_steps`</span></span>
<span id="cb6-172"><a href="#cb6-172" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Consider using a higher ZeRO stage</span>
<span id="cb6-173"><a href="#cb6-173" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-174"><a href="#cb6-174" aria-hidden="true" tabindex="-1"></a><span class="fu">## Training Instability</span></span>
<span id="cb6-175"><a href="#cb6-175" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-176"><a href="#cb6-176" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Start with DeepSpeed ZeRO-2</span>
<span id="cb6-177"><a href="#cb6-177" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Monitor loss values</span>
<span id="cb6-178"><a href="#cb6-178" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Check learning rates</span>
<span id="cb6-179"><a href="#cb6-179" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-180"><a href="#cb6-180" aria-hidden="true" tabindex="-1"></a>:::</span>
<span id="cb6-181"><a href="#cb6-181" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-182"><a href="#cb6-182" aria-hidden="true" tabindex="-1"></a>For more detailed troubleshooting, see our <span class="co">[</span><span class="ot">debugging guide</span><span class="co">](debugging.qmd)</span>.</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button" data-in-quarto-modal=""><i class="bi"></i></button></div>
<span id="cb6-138"><a href="#cb6-138" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-139"><a href="#cb6-139" aria-hidden="true" tabindex="-1"></a><span class="fu">## Sequence parallelism {#sec-sequence-parallelism}</span></span>
<span id="cb6-140"><a href="#cb6-140" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-141"><a href="#cb6-141" aria-hidden="true" tabindex="-1"></a>We support sequence parallelism (SP) via the</span>
<span id="cb6-142"><a href="#cb6-142" aria-hidden="true" tabindex="-1"></a><span class="co">[</span><span class="ot">ring-flash-attention</span><span class="co">](https://github.com/zhuzilin/ring-flash-attention)</span> project. This</span>
<span id="cb6-143"><a href="#cb6-143" aria-hidden="true" tabindex="-1"></a>allows one to split up sequences across GPUs, which is useful in the event that a</span>
<span id="cb6-144"><a href="#cb6-144" aria-hidden="true" tabindex="-1"></a>single sequence causes OOM errors during model training.</span>
<span id="cb6-145"><a href="#cb6-145" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-146"><a href="#cb6-146" aria-hidden="true" tabindex="-1"></a>See our <span class="co">[</span><span class="ot">dedicated guide</span><span class="co">](sequence_parallelism.qmd)</span> for more information.</span>
<span id="cb6-147"><a href="#cb6-147" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-148"><a href="#cb6-148" aria-hidden="true" tabindex="-1"></a><span class="fu">### FSDP + QLoRA {#sec-fsdp-qlora}</span></span>
<span id="cb6-149"><a href="#cb6-149" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-150"><a href="#cb6-150" aria-hidden="true" tabindex="-1"></a>For combining FSDP with QLoRA, see our <span class="co">[</span><span class="ot">dedicated guide</span><span class="co">](fsdp_qlora.qmd)</span>.</span>
<span id="cb6-151"><a href="#cb6-151" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-152"><a href="#cb6-152" aria-hidden="true" tabindex="-1"></a><span class="fu">## Performance Optimization {#sec-performance}</span></span>
<span id="cb6-153"><a href="#cb6-153" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-154"><a href="#cb6-154" aria-hidden="true" tabindex="-1"></a><span class="fu">### Liger Kernel Integration {#sec-liger}</span></span>
<span id="cb6-155"><a href="#cb6-155" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-156"><a href="#cb6-156" aria-hidden="true" tabindex="-1"></a>Please see <span class="co">[</span><span class="ot">docs</span><span class="co">](custom_integrations.qmd#liger)</span> for more info.</span>
<span id="cb6-157"><a href="#cb6-157" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-158"><a href="#cb6-158" aria-hidden="true" tabindex="-1"></a><span class="fu">## Troubleshooting {#sec-troubleshooting}</span></span>
<span id="cb6-159"><a href="#cb6-159" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-160"><a href="#cb6-160" aria-hidden="true" tabindex="-1"></a><span class="fu">### NCCL Issues {#sec-nccl}</span></span>
<span id="cb6-161"><a href="#cb6-161" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-162"><a href="#cb6-162" aria-hidden="true" tabindex="-1"></a>For NCCL-related problems, see our <span class="co">[</span><span class="ot">NCCL troubleshooting guide</span><span class="co">](nccl.qmd)</span>.</span>
<span id="cb6-163"><a href="#cb6-163" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-164"><a href="#cb6-164" aria-hidden="true" tabindex="-1"></a><span class="fu">### Common Problems {#sec-common-problems}</span></span>
<span id="cb6-165"><a href="#cb6-165" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-166"><a href="#cb6-166" aria-hidden="true" tabindex="-1"></a>::: {.panel-tabset}</span>
<span id="cb6-167"><a href="#cb6-167" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-168"><a href="#cb6-168" aria-hidden="true" tabindex="-1"></a><span class="fu">## Memory Issues</span></span>
<span id="cb6-169"><a href="#cb6-169" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-170"><a href="#cb6-170" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Reduce <span class="in">`micro_batch_size`</span></span>
<span id="cb6-171"><a href="#cb6-171" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Reduce <span class="in">`eval_batch_size`</span></span>
<span id="cb6-172"><a href="#cb6-172" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Adjust <span class="in">`gradient_accumulation_steps`</span></span>
<span id="cb6-173"><a href="#cb6-173" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Consider using a higher ZeRO stage</span>
<span id="cb6-174"><a href="#cb6-174" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-175"><a href="#cb6-175" aria-hidden="true" tabindex="-1"></a><span class="fu">## Training Instability</span></span>
<span id="cb6-176"><a href="#cb6-176" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-177"><a href="#cb6-177" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Start with DeepSpeed ZeRO-2</span>
<span id="cb6-178"><a href="#cb6-178" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Monitor loss values</span>
<span id="cb6-179"><a href="#cb6-179" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Check learning rates</span>
<span id="cb6-180"><a href="#cb6-180" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-181"><a href="#cb6-181" aria-hidden="true" tabindex="-1"></a>:::</span>
<span id="cb6-182"><a href="#cb6-182" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-183"><a href="#cb6-183" aria-hidden="true" tabindex="-1"></a>For more detailed troubleshooting, see our <span class="co">[</span><span class="ot">debugging guide</span><span class="co">](debugging.qmd)</span>.</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button" data-in-quarto-modal=""><i class="bi"></i></button></div>
</div></div></div></div></div>
</div> <!-- /content -->

View File

@@ -3757,7 +3757,7 @@
"href": "docs/multi-gpu.html#sec-fsdp",
"title": "Multi-GPU",
"section": "3 Fully Sharded Data Parallel (FSDP)",
"text": "3 Fully Sharded Data Parallel (FSDP)\n\n\n\n\n\n\nNote\n\n\n\nFSDP2 is recommended for new users. FSDP1 is deprecated and will be removed in an upcoming release of Axolotl.\n\n\n\n3.1 Migrating from FSDP1 to FSDP2\nTo migrate your config from FSDP1 to FSDP2, you must use the fsdp_version top-level config field to specify the FSDP version, and\nalso follow the config field mapping below to update field names.\n\n3.1.1 Config mapping\n\n\n\nFSDP1\nFSDP2\n\n\n\n\nfsdp_sharding_strategy\nreshard_after_forward\n\n\nfsdp_backward_prefetch_policy\nREMOVED\n\n\nfsdp_backward_prefetch\nREMOVED\n\n\nfsdp_forward_prefetch\nREMOVED\n\n\nfsdp_sync_module_states\nREMOVED\n\n\nfsdp_cpu_ram_efficient_loading\ncpu_ram_efficient_loading\n\n\nfsdp_state_dict_type\nstate_dict_type\n\n\nfsdp_use_orig_params\nREMOVED\n\n\n\nFor more details, please see the migration guide in the torchtitan repo. In Axolotl,\nif you were using the following FSDP1 config:\nfsdp_version: 1\nfsdp_config:\n fsdp_offload_params: false\n fsdp_cpu_ram_efficient_loading: true\n fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP\n fsdp_transformer_layer_cls_to_wrap: Qwen3DecoderLayer\n fsdp_state_dict_type: FULL_STATE_DICT\n fsdp_sharding_strategy: FULL_SHARD\nYou can migrate to the following FSDP2 config:\nfsdp_version: 2\nfsdp_config:\n offload_params: false\n cpu_ram_efficient_loading: true\n auto_wrap_policy: TRANSFORMER_BASED_WRAP\n transformer_layer_cls_to_wrap: Qwen3DecoderLayer\n state_dict_type: FULL_STATE_DICT\n reshard_after_forward: true\n\n\n\n3.2 FSDP1 (deprecated)\n\n\n\n\n\n\nNote\n\n\n\nUsing fsdp to configure FSDP is deprecated and will be removed in an upcoming release of Axolotl. Please use fsdp_config as above instead.\n\n\nfsdp:\n - full_shard\n - auto_wrap\nfsdp_config:\n fsdp_offload_params: true\n fsdp_state_dict_type: FULL_STATE_DICT\n fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer",
"text": "3 Fully Sharded Data Parallel (FSDP)\n\n\n\n\n\n\nNote\n\n\n\nFSDP2 is recommended for new users. FSDP1 is deprecated and will be removed in an upcoming release of Axolotl.\n\n\n\n3.1 Migrating from FSDP1 to FSDP2\nTo migrate your config from FSDP1 to FSDP2, you must use the fsdp_version top-level config field to specify the FSDP version, and\nalso follow the config field mapping below to update field names.\n\n3.1.1 Config mapping\n\n\n\nFSDP1\nFSDP2\n\n\n\n\nfsdp_sharding_strategy\nreshard_after_forward\n\n\nfsdp_backward_prefetch_policy\nREMOVED\n\n\nfsdp_backward_prefetch\nREMOVED\n\n\nfsdp_forward_prefetch\nREMOVED\n\n\nfsdp_sync_module_states\nREMOVED\n\n\nfsdp_cpu_ram_efficient_loading\ncpu_ram_efficient_loading\n\n\nfsdp_state_dict_type\nstate_dict_type\n\n\nfsdp_use_orig_params\nREMOVED\n\n\nfsdp_activation_checkpointing\nactivation_checkpointing\n\n\n\nFor more details, please see the migration guide in the torchtitan repo. In Axolotl,\nif you were using the following FSDP1 config:\nfsdp_version: 1\nfsdp_config:\n fsdp_offload_params: false\n fsdp_cpu_ram_efficient_loading: true\n fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP\n fsdp_transformer_layer_cls_to_wrap: Qwen3DecoderLayer\n fsdp_state_dict_type: FULL_STATE_DICT\n fsdp_sharding_strategy: FULL_SHARD\nYou can migrate to the following FSDP2 config:\nfsdp_version: 2\nfsdp_config:\n offload_params: false\n cpu_ram_efficient_loading: true\n auto_wrap_policy: TRANSFORMER_BASED_WRAP\n transformer_layer_cls_to_wrap: Qwen3DecoderLayer\n state_dict_type: FULL_STATE_DICT\n reshard_after_forward: true\n\n\n\n3.2 FSDP1 (deprecated)\n\n\n\n\n\n\nNote\n\n\n\nUsing fsdp to configure FSDP is deprecated and will be removed in an upcoming release of Axolotl. Please use fsdp_config as above instead.\n\n\nfsdp:\n - full_shard\n - auto_wrap\nfsdp_config:\n fsdp_offload_params: true\n fsdp_state_dict_type: FULL_STATE_DICT\n fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer",
"crumbs": [
"Deployments",
"Multi-GPU"

View File

@@ -2,798 +2,798 @@
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html</loc>
<lastmod>2025-10-09T18:18:54.738Z</lastmod>
<lastmod>2025-10-10T12:57:10.844Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/mac.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/cli.html</loc>
<lastmod>2025-10-09T18:18:54.711Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/nccl.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/getting-started.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.819Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/lr_groups.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/qat.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/multipack.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/streaming.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.823Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/lora_optims.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/amd_hpc.html</loc>
<lastmod>2025-10-09T18:18:54.711Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/debugging.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.819Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/conversation.html</loc>
<lastmod>2025-10-09T18:18:54.711Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/index.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/config-reference.html</loc>
<lastmod>2025-10-09T18:22:17.616Z</lastmod>
<lastmod>2025-10-10T13:00:50.711Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/multimodal.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/ray-integration.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/faq.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.819Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset_preprocessing.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.819Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/torchao.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.823Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/optimizers.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schedulers.html</loc>
<lastmod>2025-10-09T18:22:02.135Z</lastmod>
<lastmod>2025-10-10T13:00:34.956Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html</loc>
<lastmod>2025-10-09T18:22:01.340Z</lastmod>
<lastmod>2025-10-10T13:00:34.170Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/datasets.html</loc>
<lastmod>2025-10-09T18:22:00.935Z</lastmod>
<lastmod>2025-10-10T13:00:33.767Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.tokenization.html</loc>
<lastmod>2025-10-09T18:22:02.052Z</lastmod>
<lastmod>2025-10-10T13:00:34.873Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/loaders.tokenizer.html</loc>
<lastmod>2025-10-09T18:22:01.458Z</lastmod>
<lastmod>2025-10-10T13:00:34.287Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html</loc>
<lastmod>2025-10-09T18:22:01.925Z</lastmod>
<lastmod>2025-10-10T13:00:34.747Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html</loc>
<lastmod>2025-10-09T18:22:02.011Z</lastmod>
<lastmod>2025-10-10T13:00:34.833Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.data.sft.html</loc>
<lastmod>2025-10-09T18:22:02.186Z</lastmod>
<lastmod>2025-10-10T13:00:35.007Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html</loc>
<lastmod>2025-10-09T18:22:01.992Z</lastmod>
<lastmod>2025-10-10T13:00:34.814Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/loaders.patch_manager.html</loc>
<lastmod>2025-10-09T18:22:01.479Z</lastmod>
<lastmod>2025-10-10T13:00:34.308Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.liger.args.html</loc>
<lastmod>2025-10-09T18:22:02.506Z</lastmod>
<lastmod>2025-10-10T13:00:35.323Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.peft.html</loc>
<lastmod>2025-10-09T18:22:02.275Z</lastmod>
<lastmod>2025-10-10T13:00:35.095Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html</loc>
<lastmod>2025-10-09T18:22:01.673Z</lastmod>
<lastmod>2025-10-10T13:00:34.499Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html</loc>
<lastmod>2025-10-09T18:22:01.591Z</lastmod>
<lastmod>2025-10-10T13:00:34.418Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.cloud.base.html</loc>
<lastmod>2025-10-09T18:22:01.296Z</lastmod>
<lastmod>2025-10-10T13:00:34.127Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html</loc>
<lastmod>2025-10-09T18:22:02.043Z</lastmod>
<lastmod>2025-10-10T13:00:34.865Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.swiglu.html</loc>
<lastmod>2025-10-09T18:22:01.895Z</lastmod>
<lastmod>2025-10-10T13:00:34.718Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html</loc>
<lastmod>2025-10-09T18:22:02.491Z</lastmod>
<lastmod>2025-10-10T13:00:35.308Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html</loc>
<lastmod>2025-10-09T18:22:01.733Z</lastmod>
<lastmod>2025-10-10T13:00:34.559Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.utils.html</loc>
<lastmod>2025-10-09T18:22:01.969Z</lastmod>
<lastmod>2025-10-10T13:00:34.792Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.builders.rl.html</loc>
<lastmod>2025-10-09T18:22:01.033Z</lastmod>
<lastmod>2025-10-10T13:00:33.865Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/loaders.processor.html</loc>
<lastmod>2025-10-09T18:22:01.460Z</lastmod>
<lastmod>2025-10-10T13:00:34.289Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html</loc>
<lastmod>2025-10-09T18:22:02.637Z</lastmod>
<lastmod>2025-10-10T13:00:35.452Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.training_args.html</loc>
<lastmod>2025-10-09T18:22:01.049Z</lastmod>
<lastmod>2025-10-10T13:00:33.881Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/loaders.adapter.html</loc>
<lastmod>2025-10-09T18:22:01.467Z</lastmod>
<lastmod>2025-10-10T13:00:34.296Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html</loc>
<lastmod>2025-10-09T18:22:01.267Z</lastmod>
<lastmod>2025-10-10T13:00:34.098Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.train.html</loc>
<lastmod>2025-10-09T18:22:01.151Z</lastmod>
<lastmod>2025-10-10T13:00:33.982Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html</loc>
<lastmod>2025-10-09T18:22:01.492Z</lastmod>
<lastmod>2025-10-10T13:00:34.321Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html</loc>
<lastmod>2025-10-09T18:22:01.638Z</lastmod>
<lastmod>2025-10-10T13:00:34.465Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html</loc>
<lastmod>2025-10-09T18:22:01.651Z</lastmod>
<lastmod>2025-10-10T13:00:34.478Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html</loc>
<lastmod>2025-10-09T18:22:01.960Z</lastmod>
<lastmod>2025-10-10T13:00:34.782Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html</loc>
<lastmod>2025-10-09T18:22:01.678Z</lastmod>
<lastmod>2025-10-10T13:00:34.504Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html</loc>
<lastmod>2025-10-09T18:22:01.615Z</lastmod>
<lastmod>2025-10-10T13:00:34.442Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.chat.messages.html</loc>
<lastmod>2025-10-09T18:22:01.078Z</lastmod>
<lastmod>2025-10-10T13:00:33.909Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html</loc>
<lastmod>2025-10-09T18:22:01.500Z</lastmod>
<lastmod>2025-10-10T13:00:34.329Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html</loc>
<lastmod>2025-10-09T18:22:01.709Z</lastmod>
<lastmod>2025-10-10T13:00:34.535Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html</loc>
<lastmod>2025-10-09T18:22:01.721Z</lastmod>
<lastmod>2025-10-10T13:00:34.547Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html</loc>
<lastmod>2025-10-09T18:22:02.301Z</lastmod>
<lastmod>2025-10-10T13:00:35.121Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/convert.html</loc>
<lastmod>2025-10-09T18:22:00.952Z</lastmod>
<lastmod>2025-10-10T13:00:33.784Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html</loc>
<lastmod>2025-10-09T18:22:01.711Z</lastmod>
<lastmod>2025-10-10T13:00:34.537Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.config.html</loc>
<lastmod>2025-10-09T18:22:02.225Z</lastmod>
<lastmod>2025-10-10T13:00:35.046Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.enums.html</loc>
<lastmod>2025-10-09T18:22:02.311Z</lastmod>
<lastmod>2025-10-10T13:00:35.132Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html</loc>
<lastmod>2025-10-09T18:22:01.971Z</lastmod>
<lastmod>2025-10-10T13:00:34.793Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html</loc>
<lastmod>2025-10-09T18:22:01.680Z</lastmod>
<lastmod>2025-10-10T13:00:34.506Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html</loc>
<lastmod>2025-10-09T18:22:01.419Z</lastmod>
<lastmod>2025-10-10T13:00:34.248Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html</loc>
<lastmod>2025-10-09T18:22:02.510Z</lastmod>
<lastmod>2025-10-10T13:00:35.327Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.collators.core.html</loc>
<lastmod>2025-10-09T18:22:02.539Z</lastmod>
<lastmod>2025-10-10T13:00:35.356Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.shared.html</loc>
<lastmod>2025-10-09T18:22:01.083Z</lastmod>
<lastmod>2025-10-10T13:00:33.915Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html</loc>
<lastmod>2025-10-09T18:22:01.759Z</lastmod>
<lastmod>2025-10-10T13:00:34.584Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html</loc>
<lastmod>2025-10-09T18:22:02.622Z</lastmod>
<lastmod>2025-10-10T13:00:35.438Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html</loc>
<lastmod>2025-10-09T18:22:02.654Z</lastmod>
<lastmod>2025-10-10T13:00:35.469Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html</loc>
<lastmod>2025-10-09T18:22:01.572Z</lastmod>
<lastmod>2025-10-10T13:00:34.400Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html</loc>
<lastmod>2025-10-09T18:22:02.286Z</lastmod>
<lastmod>2025-10-10T13:00:35.106Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html</loc>
<lastmod>2025-10-09T18:22:02.646Z</lastmod>
<lastmod>2025-10-10T13:00:35.461Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.base.html</loc>
<lastmod>2025-10-09T18:22:01.531Z</lastmod>
<lastmod>2025-10-10T13:00:34.360Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.utils.html</loc>
<lastmod>2025-10-09T18:22:01.906Z</lastmod>
<lastmod>2025-10-10T13:00:34.729Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.merge_lora.html</loc>
<lastmod>2025-10-09T18:22:01.253Z</lastmod>
<lastmod>2025-10-10T13:00:34.084Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.utils.html</loc>
<lastmod>2025-10-09T18:22:01.306Z</lastmod>
<lastmod>2025-10-10T13:00:34.136Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html</loc>
<lastmod>2025-10-09T18:22:01.529Z</lastmod>
<lastmod>2025-10-10T13:00:34.358Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/index.html</loc>
<lastmod>2025-10-09T18:22:00.837Z</lastmod>
<lastmod>2025-10-10T13:00:33.670Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html</loc>
<lastmod>2025-10-09T18:22:01.693Z</lastmod>
<lastmod>2025-10-10T13:00:34.519Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html</loc>
<lastmod>2025-10-09T18:22:02.007Z</lastmod>
<lastmod>2025-10-10T13:00:34.829Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html</loc>
<lastmod>2025-10-09T18:22:01.664Z</lastmod>
<lastmod>2025-10-10T13:00:34.491Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html</loc>
<lastmod>2025-10-09T18:22:01.433Z</lastmod>
<lastmod>2025-10-10T13:00:34.263Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.lora.html</loc>
<lastmod>2025-10-09T18:22:02.060Z</lastmod>
<lastmod>2025-10-10T13:00:34.881Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html</loc>
<lastmod>2025-10-09T18:22:01.488Z</lastmod>
<lastmod>2025-10-10T13:00:34.317Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.config.html</loc>
<lastmod>2025-10-09T18:22:01.219Z</lastmod>
<lastmod>2025-10-10T13:00:34.051Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html</loc>
<lastmod>2025-10-09T18:22:01.919Z</lastmod>
<lastmod>2025-10-10T13:00:34.741Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.collators.batching.html</loc>
<lastmod>2025-10-09T18:22:02.563Z</lastmod>
<lastmod>2025-10-10T13:00:35.379Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.quantization.html</loc>
<lastmod>2025-10-09T18:22:02.210Z</lastmod>
<lastmod>2025-10-10T13:00:35.030Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.dict.html</loc>
<lastmod>2025-10-09T18:22:02.167Z</lastmod>
<lastmod>2025-10-10T13:00:34.988Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.quantize.html</loc>
<lastmod>2025-10-09T18:22:01.904Z</lastmod>
<lastmod>2025-10-10T13:00:34.727Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.training.html</loc>
<lastmod>2025-10-09T18:22:02.242Z</lastmod>
<lastmod>2025-10-10T13:00:35.063Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/train.html</loc>
<lastmod>2025-10-09T18:22:00.915Z</lastmod>
<lastmod>2025-10-10T13:00:33.747Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html</loc>
<lastmod>2025-10-09T18:22:01.099Z</lastmod>
<lastmod>2025-10-10T13:00:33.930Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/inference.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/FAQS.html</loc>
<lastmod>2025-10-09T18:18:54.710Z</lastmod>
<lastmod>2025-10-10T12:57:10.816Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html</loc>
<lastmod>2025-10-09T18:18:54.720Z</lastmod>
<lastmod>2025-10-10T12:57:10.827Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/index.html</loc>
<lastmod>2025-10-09T18:18:54.733Z</lastmod>
<lastmod>2025-10-10T12:57:10.839Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/custom_integrations.html</loc>
<lastmod>2025-10-09T18:18:54.711Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.utils.html</loc>
<lastmod>2025-10-09T18:22:02.318Z</lastmod>
<lastmod>2025-10-10T13:00:35.138Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.geglu.html</loc>
<lastmod>2025-10-09T18:22:01.883Z</lastmod>
<lastmod>2025-10-10T13:00:34.706Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.builders.causal.html</loc>
<lastmod>2025-10-09T18:22:01.028Z</lastmod>
<lastmod>2025-10-10T13:00:33.860Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mamba.html</loc>
<lastmod>2025-10-09T18:22:01.397Z</lastmod>
<lastmod>2025-10-10T13:00:34.227Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html</loc>
<lastmod>2025-10-09T18:22:01.764Z</lastmod>
<lastmod>2025-10-10T13:00:34.589Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.datasets.chat.html</loc>
<lastmod>2025-10-09T18:22:01.089Z</lastmod>
<lastmod>2025-10-10T13:00:33.921Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html</loc>
<lastmod>2025-10-09T18:22:02.573Z</lastmod>
<lastmod>2025-10-10T13:00:35.389Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html</loc>
<lastmod>2025-10-09T18:22:01.631Z</lastmod>
<lastmod>2025-10-10T13:00:34.458Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/common.const.html</loc>
<lastmod>2025-10-09T18:22:02.518Z</lastmod>
<lastmod>2025-10-10T13:00:35.334Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.quantize.html</loc>
<lastmod>2025-10-09T18:22:01.283Z</lastmod>
<lastmod>2025-10-10T13:00:34.114Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.trainer.html</loc>
<lastmod>2025-10-09T18:22:02.102Z</lastmod>
<lastmod>2025-10-10T13:00:34.923Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html</loc>
<lastmod>2025-10-09T18:22:01.225Z</lastmod>
<lastmod>2025-10-10T13:00:34.057Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/evaluate.html</loc>
<lastmod>2025-10-09T18:22:00.928Z</lastmod>
<lastmod>2025-10-10T13:00:33.760Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html</loc>
<lastmod>2025-10-09T18:22:01.917Z</lastmod>
<lastmod>2025-10-10T13:00:34.739Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/loaders.model.html</loc>
<lastmod>2025-10-09T18:22:01.448Z</lastmod>
<lastmod>2025-10-10T13:00:34.277Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.distributed.html</loc>
<lastmod>2025-10-09T18:22:02.160Z</lastmod>
<lastmod>2025-10-10T13:00:34.981Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html</loc>
<lastmod>2025-10-09T18:22:02.067Z</lastmod>
<lastmod>2025-10-10T13:00:34.888Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/kernels.lora.html</loc>
<lastmod>2025-10-09T18:22:01.870Z</lastmod>
<lastmod>2025-10-10T13:00:34.693Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.main.html</loc>
<lastmod>2025-10-09T18:22:01.141Z</lastmod>
<lastmod>2025-10-10T13:00:33.972Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html</loc>
<lastmod>2025-10-09T18:22:02.514Z</lastmod>
<lastmod>2025-10-10T13:00:35.331Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html</loc>
<lastmod>2025-10-09T18:22:02.176Z</lastmod>
<lastmod>2025-10-10T13:00:34.997Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html</loc>
<lastmod>2025-10-09T18:22:01.304Z</lastmod>
<lastmod>2025-10-10T13:00:34.134Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html</loc>
<lastmod>2025-10-09T18:22:01.913Z</lastmod>
<lastmod>2025-10-10T13:00:34.736Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.builders.base.html</loc>
<lastmod>2025-10-09T18:22:01.022Z</lastmod>
<lastmod>2025-10-10T13:00:33.854Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.trl.html</loc>
<lastmod>2025-10-09T18:22:02.279Z</lastmod>
<lastmod>2025-10-10T13:00:35.099Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.utils.args.html</loc>
<lastmod>2025-10-09T18:22:01.320Z</lastmod>
<lastmod>2025-10-10T13:00:34.150Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.base.html</loc>
<lastmod>2025-10-09T18:22:01.372Z</lastmod>
<lastmod>2025-10-10T13:00:34.202Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html</loc>
<lastmod>2025-10-09T18:22:01.973Z</lastmod>
<lastmod>2025-10-10T13:00:34.795Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html</loc>
<lastmod>2025-10-09T18:22:01.915Z</lastmod>
<lastmod>2025-10-10T13:00:34.738Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.model.html</loc>
<lastmod>2025-10-09T18:22:02.234Z</lastmod>
<lastmod>2025-10-10T13:00:35.054Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html</loc>
<lastmod>2025-10-09T18:22:01.732Z</lastmod>
<lastmod>2025-10-10T13:00:34.557Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html</loc>
<lastmod>2025-10-09T18:22:02.641Z</lastmod>
<lastmod>2025-10-10T13:00:35.457Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/common.datasets.html</loc>
<lastmod>2025-10-09T18:22:02.536Z</lastmod>
<lastmod>2025-10-10T13:00:35.353Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html</loc>
<lastmod>2025-10-09T18:22:02.264Z</lastmod>
<lastmod>2025-10-10T13:00:35.085Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.utils.fetch.html</loc>
<lastmod>2025-10-09T18:22:01.326Z</lastmod>
<lastmod>2025-10-10T13:00:34.157Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html</loc>
<lastmod>2025-10-09T18:22:01.706Z</lastmod>
<lastmod>2025-10-10T13:00:34.532Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.relora.html</loc>
<lastmod>2025-10-09T18:22:01.923Z</lastmod>
<lastmod>2025-10-10T13:00:34.746Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.evaluate.html</loc>
<lastmod>2025-10-09T18:22:01.162Z</lastmod>
<lastmod>2025-10-10T13:00:33.992Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html</loc>
<lastmod>2025-10-09T18:22:01.707Z</lastmod>
<lastmod>2025-10-10T13:00:34.534Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.utils.html</loc>
<lastmod>2025-10-09T18:22:01.435Z</lastmod>
<lastmod>2025-10-10T13:00:34.265Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html</loc>
<lastmod>2025-10-09T18:22:01.605Z</lastmod>
<lastmod>2025-10-10T13:00:34.433Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.chat_templates.html</loc>
<lastmod>2025-10-09T18:22:02.054Z</lastmod>
<lastmod>2025-10-10T13:00:34.875Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.data.streaming.html</loc>
<lastmod>2025-10-09T18:22:02.178Z</lastmod>
<lastmod>2025-10-10T13:00:34.999Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.bench.html</loc>
<lastmod>2025-10-09T18:22:02.071Z</lastmod>
<lastmod>2025-10-10T13:00:34.893Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/common.architectures.html</loc>
<lastmod>2025-10-09T18:22:02.516Z</lastmod>
<lastmod>2025-10-10T13:00:35.333Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.checks.html</loc>
<lastmod>2025-10-09T18:22:01.198Z</lastmod>
<lastmod>2025-10-10T13:00:34.029Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html</loc>
<lastmod>2025-10-09T18:22:01.405Z</lastmod>
<lastmod>2025-10-10T13:00:34.235Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.base.html</loc>
<lastmod>2025-10-09T18:22:02.487Z</lastmod>
<lastmod>2025-10-10T13:00:35.304Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.utils.train.html</loc>
<lastmod>2025-10-09T18:22:01.355Z</lastmod>
<lastmod>2025-10-10T13:00:34.185Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.collators.mamba.html</loc>
<lastmod>2025-10-09T18:22:02.567Z</lastmod>
<lastmod>2025-10-10T13:00:35.383Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.art.html</loc>
<lastmod>2025-10-09T18:22:01.190Z</lastmod>
<lastmod>2025-10-10T13:00:34.021Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html</loc>
<lastmod>2025-10-09T18:22:01.984Z</lastmod>
<lastmod>2025-10-10T13:00:34.806Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/logging_config.html</loc>
<lastmod>2025-10-09T18:22:01.015Z</lastmod>
<lastmod>2025-10-10T13:00:33.846Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.freeze.html</loc>
<lastmod>2025-10-09T18:22:02.081Z</lastmod>
<lastmod>2025-10-10T13:00:34.902Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html</loc>
<lastmod>2025-10-09T18:22:01.660Z</lastmod>
<lastmod>2025-10-10T13:00:34.486Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html</loc>
<lastmod>2025-10-09T18:22:01.589Z</lastmod>
<lastmod>2025-10-10T13:00:34.416Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html</loc>
<lastmod>2025-10-09T18:22:01.980Z</lastmod>
<lastmod>2025-10-10T13:00:34.802Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html</loc>
<lastmod>2025-10-09T18:22:02.537Z</lastmod>
<lastmod>2025-10-10T13:00:35.354Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.trainers.trl.html</loc>
<lastmod>2025-10-09T18:22:01.390Z</lastmod>
<lastmod>2025-10-10T13:00:34.220Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html</loc>
<lastmod>2025-10-09T18:22:01.646Z</lastmod>
<lastmod>2025-10-10T13:00:34.472Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/loaders.constants.html</loc>
<lastmod>2025-10-09T18:22:01.481Z</lastmod>
<lastmod>2025-10-10T13:00:34.310Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html</loc>
<lastmod>2025-10-09T18:22:02.005Z</lastmod>
<lastmod>2025-10-10T13:00:34.827Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.vllm_serve.html</loc>
<lastmod>2025-10-09T18:22:01.292Z</lastmod>
<lastmod>2025-10-10T13:00:34.123Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/prompt_tokenizers.html</loc>
<lastmod>2025-10-09T18:22:01.003Z</lastmod>
<lastmod>2025-10-10T13:00:33.835Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.args.html</loc>
<lastmod>2025-10-09T18:22:01.186Z</lastmod>
<lastmod>2025-10-10T13:00:34.017Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.inference.html</loc>
<lastmod>2025-10-09T18:22:01.243Z</lastmod>
<lastmod>2025-10-10T13:00:34.074Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.utils.load.html</loc>
<lastmod>2025-10-09T18:22:01.333Z</lastmod>
<lastmod>2025-10-10T13:00:34.163Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/cli.preprocess.html</loc>
<lastmod>2025-10-09T18:22:01.277Z</lastmod>
<lastmod>2025-10-10T13:00:34.108Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html</loc>
<lastmod>2025-10-09T18:22:02.635Z</lastmod>
<lastmod>2025-10-10T13:00:35.451Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html</loc>
<lastmod>2025-10-09T18:22:02.630Z</lastmod>
<lastmod>2025-10-10T13:00:35.446Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html</loc>
<lastmod>2025-10-09T18:22:01.079Z</lastmod>
<lastmod>2025-10-10T13:00:33.911Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html</loc>
<lastmod>2025-10-09T18:22:02.492Z</lastmod>
<lastmod>2025-10-10T13:00:35.309Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html</loc>
<lastmod>2025-10-09T18:22:02.502Z</lastmod>
<lastmod>2025-10-10T13:00:35.319Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html</loc>
<lastmod>2025-10-09T18:22:01.994Z</lastmod>
<lastmod>2025-10-10T13:00:34.816Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html</loc>
<lastmod>2025-10-09T18:22:01.081Z</lastmod>
<lastmod>2025-10-10T13:00:33.913Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/reward_modelling.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/quantize.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/fsdp_qlora.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.819Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/nd_parallelism.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/batch_vs_grad.html</loc>
<lastmod>2025-10-09T18:18:54.711Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/multi-node.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/rlhf.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/pretraining.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/tokenized.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.819Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset-formats/template_free.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.818Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/multi-gpu.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/input_output.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/docker.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.819Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/gradient_checkpointing.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.819Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/optimizations.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/sequence_parallelism.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.823Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/dataset_loading.html</loc>
<lastmod>2025-10-09T18:18:54.712Z</lastmod>
<lastmod>2025-10-10T12:57:10.819Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/installation.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/mixed_precision.html</loc>
<lastmod>2025-10-09T18:18:54.715Z</lastmod>
<lastmod>2025-10-10T12:57:10.822Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/docs/unsloth.html</loc>
<lastmod>2025-10-09T18:18:54.716Z</lastmod>
<lastmod>2025-10-10T12:57:10.823Z</lastmod>
</url>
<url>
<loc>https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html</loc>
<lastmod>2025-10-09T18:18:54.737Z</lastmod>
<lastmod>2025-10-10T12:57:10.844Z</lastmod>
</url>
</urlset>