From a3cdeab27e87306bc8262dc6001db5872d95c6cb Mon Sep 17 00:00:00 2001
From: Quarto GHA Workflow Runner <quarto-github-actions-publish@example.com>
Date: Thu, 19 Feb 2026 23:34:25 +0000
Subject: [PATCH] Built site for gh-pages

---
 .nojekyll                     |    2 +-
 docs/cli.html                 |   16 +-
 docs/custom_integrations.html | 1007 ++++++++++++++++++---------------
 search.json                   |   15 +-
 sitemap.xml                   |  472 +++++++--------
 5 files changed, 804 insertions(+), 708 deletions(-)
diff --git a/.nojekyll b/.nojekyll
index 6b0c7b8ae..9c1fda326 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-eac6727e
\ No newline at end of file
+8763ebce
\ No newline at end of file
diff --git a/docs/cli.html b/docs/cli.html
index 6e0983219..87f9fb3fa 100644
--- a/docs/cli.html
+++ b/docs/cli.html
@@ -944,13 +944,15 @@ the CLI commands, their usage, and common examples.</p>
 <div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb14"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Basic evaluation</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> lm-eval config.yml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>Configuration options:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb15"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="co"># List of tasks to evaluate</span></span>
-<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_tasks</span><span class="kw">:</span></span>
-<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> arc_challenge</span></span>
-<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> hellaswag</span></span>
-<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_batch_size</span><span class="kw">:</span><span class="co"> # Batch size for evaluation</span></span>
-<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span><span class="co"> # Directory to save evaluation results</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
-<p>See <a href="https://github.com/EleutherAI/lm-evaluation-harness">LM Eval Harness</a> for more details.</p>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb15"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_model</span><span class="kw">:</span><span class="co"> # model to evaluate (local or hf path)</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="co"># List of tasks to evaluate</span></span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_tasks</span><span class="kw">:</span></span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> arc_challenge</span></span>
+<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> hellaswag</span></span>
+<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_batch_size</span><span class="kw">:</span><span class="co"> # Batch size for evaluation</span></span>
+<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span><span class="co"> # Directory to save evaluation results</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<p>See <a href="https://docs.axolotl.ai/docs/custom_integrations.html#language-model-evaluation-harness-lm-eval">LM Eval Harness integration docs</a> for full configuration details.</p>
 </section>
 <section id="delinearize-llama4" class="level3">
 <h3 class="anchored" data-anchor-id="delinearize-llama4">delinearize-llama4</h3>
diff --git a/docs/custom_integrations.html b/docs/custom_integrations.html
index 78f1e15df..2cad39191 100644
--- a/docs/custom_integrations.html
+++ b/docs/custom_integrations.html
@@ -778,14 +778,21 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
   <li><a href="#usage-1" id="toc-usage-1" class="nav-link" data-scroll-target="#usage-1">Usage</a></li>
   <li><a href="#citation-1" id="toc-citation-1" class="nav-link" data-scroll-target="#citation-1">Citation</a></li>
   </ul></li>
-  <li><a href="#knowledge-distillation-kd" id="toc-knowledge-distillation-kd" class="nav-link" data-scroll-target="#knowledge-distillation-kd">Knowledge Distillation (KD)</a>
+  <li><a href="#kernels-integration" id="toc-kernels-integration" class="nav-link" data-scroll-target="#kernels-integration">Kernels Integration</a>
   <ul class="collapse">
   <li><a href="#usage-2" id="toc-usage-2" class="nav-link" data-scroll-target="#usage-2">Usage</a></li>
+  <li><a href="#how-it-works-1" id="toc-how-it-works-1" class="nav-link" data-scroll-target="#how-it-works-1">How It Works</a></li>
+  <li><a href="#limitations-1" id="toc-limitations-1" class="nav-link" data-scroll-target="#limitations-1">Limitations</a></li>
+  <li><a href="#note-on-megablocks" id="toc-note-on-megablocks" class="nav-link" data-scroll-target="#note-on-megablocks">Note on MegaBlocks</a></li>
+  </ul></li>
+  <li><a href="#knowledge-distillation-kd" id="toc-knowledge-distillation-kd" class="nav-link" data-scroll-target="#knowledge-distillation-kd">Knowledge Distillation (KD)</a>
+  <ul class="collapse">
+  <li><a href="#usage-3" id="toc-usage-3" class="nav-link" data-scroll-target="#usage-3">Usage</a></li>
   </ul></li>
   <li><a href="#llmcompressor" id="toc-llmcompressor" class="nav-link" data-scroll-target="#llmcompressor">LLMCompressor</a>
   <ul class="collapse">
   <li><a href="#requirements-1" id="toc-requirements-1" class="nav-link" data-scroll-target="#requirements-1">Requirements</a></li>
-  <li><a href="#usage-3" id="toc-usage-3" class="nav-link" data-scroll-target="#usage-3">Usage</a></li>
+  <li><a href="#usage-4" id="toc-usage-4" class="nav-link" data-scroll-target="#usage-4">Usage</a></li>
   <li><a href="#storage-optimization-with-save_compressed" id="toc-storage-optimization-with-save_compressed" class="nav-link" data-scroll-target="#storage-optimization-with-save_compressed">Storage Optimization with save_compressed</a></li>
   <li><a href="#example-config" id="toc-example-config" class="nav-link" data-scroll-target="#example-config">Example Config</a></li>
   <li><a href="#inference-with-vllm" id="toc-inference-with-vllm" class="nav-link" data-scroll-target="#inference-with-vllm">Inference with vLLM</a></li>
@@ -793,19 +800,22 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
   </ul></li>
   <li><a href="#language-model-evaluation-harness-lm-eval" id="toc-language-model-evaluation-harness-lm-eval" class="nav-link" data-scroll-target="#language-model-evaluation-harness-lm-eval">Language Model Evaluation Harness (LM Eval)</a>
   <ul class="collapse">
-  <li><a href="#usage-4" id="toc-usage-4" class="nav-link" data-scroll-target="#usage-4">Usage</a></li>
+  <li><a href="#usage-5" id="toc-usage-5" class="nav-link" data-scroll-target="#usage-5">Usage</a></li>
+  <li><a href="#post-training-evaluation" id="toc-post-training-evaluation" class="nav-link" data-scroll-target="#post-training-evaluation">1. Post-Training Evaluation</a></li>
+  <li><a href="#standalone-cli-evaluation" id="toc-standalone-cli-evaluation" class="nav-link" data-scroll-target="#standalone-cli-evaluation">2. Standalone CLI Evaluation</a></li>
+  <li><a href="#model-selection-priority" id="toc-model-selection-priority" class="nav-link" data-scroll-target="#model-selection-priority">Model Selection Priority</a></li>
   <li><a href="#citation-2" id="toc-citation-2" class="nav-link" data-scroll-target="#citation-2">Citation</a></li>
   </ul></li>
   <li><a href="#liger-kernels" id="toc-liger-kernels" class="nav-link" data-scroll-target="#liger-kernels">Liger Kernels</a>
   <ul class="collapse">
-  <li><a href="#usage-5" id="toc-usage-5" class="nav-link" data-scroll-target="#usage-5">Usage</a></li>
+  <li><a href="#usage-6" id="toc-usage-6" class="nav-link" data-scroll-target="#usage-6">Usage</a></li>
   <li><a href="#supported-models-2" id="toc-supported-models-2" class="nav-link" data-scroll-target="#supported-models-2">Supported Models</a></li>
   <li><a href="#citation-3" id="toc-citation-3" class="nav-link" data-scroll-target="#citation-3">Citation</a></li>
   </ul></li>
   <li><a href="#spectrum" id="toc-spectrum" class="nav-link" data-scroll-target="#spectrum">Spectrum</a>
   <ul class="collapse">
   <li><a href="#overview-1" id="toc-overview-1" class="nav-link" data-scroll-target="#overview-1">Overview</a></li>
-  <li><a href="#usage-6" id="toc-usage-6" class="nav-link" data-scroll-target="#usage-6">Usage</a></li>
+  <li><a href="#usage-7" id="toc-usage-7" class="nav-link" data-scroll-target="#usage-7">Usage</a></li>
   <li><a href="#citation-4" id="toc-citation-4" class="nav-link" data-scroll-target="#citation-4">Citation</a></li>
   </ul></li>
   <li><a href="#swanlab-integration-for-axolotl" id="toc-swanlab-integration-for-axolotl" class="nav-link" data-scroll-target="#swanlab-integration-for-axolotl">SwanLab Integration for Axolotl</a>
@@ -839,7 +849,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
   <li><a href="#example-dpo-training-with-completion-logging" id="toc-example-dpo-training-with-completion-logging" class="nav-link" data-scroll-target="#example-dpo-training-with-completion-logging">Example: DPO Training with Completion Logging</a></li>
   <li><a href="#example-disable-completion-logging" id="toc-example-disable-completion-logging" class="nav-link" data-scroll-target="#example-disable-completion-logging">Example: Disable Completion Logging</a></li>
   <li><a href="#supported-rlhf-trainers" id="toc-supported-rlhf-trainers" class="nav-link" data-scroll-target="#supported-rlhf-trainers">Supported RLHF Trainers</a></li>
-  <li><a href="#how-it-works-1" id="toc-how-it-works-1" class="nav-link" data-scroll-target="#how-it-works-1">How It Works</a></li>
+  <li><a href="#how-it-works-2" id="toc-how-it-works-2" class="nav-link" data-scroll-target="#how-it-works-2">How It Works</a></li>
   <li><a href="#viewing-completion-tables" id="toc-viewing-completion-tables" class="nav-link" data-scroll-target="#viewing-completion-tables">Viewing Completion Tables</a></li>
   <li><a href="#memory-management" id="toc-memory-management" class="nav-link" data-scroll-target="#memory-management">Memory Management</a></li>
   <li><a href="#performance-impact" id="toc-performance-impact" class="nav-link" data-scroll-target="#performance-impact">Performance Impact</a></li>
@@ -1200,25 +1210,63 @@ The quick brown fox jumps over the loud dog</code></pre>
 <p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/grokfast">here</a></p>
 </section>
 </section>
-<section id="knowledge-distillation-kd" class="level2">
-<h2 class="anchored" data-anchor-id="knowledge-distillation-kd">Knowledge Distillation (KD)</h2>
+<section id="kernels-integration" class="level2">
+<h2 class="anchored" data-anchor-id="kernels-integration">Kernels Integration</h2>
+<p>MoE (Mixture of Experts) kernels speed up training for MoE layers and reduce VRAM costs. In transformers v5, <code>batched_mm</code> and <code>grouped_mm</code> were integrated as built-in options via the <code>experts_implementation</code> config kwarg:</p>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> ExpertsInterface(GeneralInterface):</span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>    _global_mapping <span class="op">=</span> {</span>
+<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>        <span class="st">"batched_mm"</span>: batched_mm_experts_forward,</span>
+<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>        <span class="st">"grouped_mm"</span>: grouped_mm_experts_forward,</span>
+<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a>    }</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<p>In our custom integration, we add support for <strong>ScatterMoE</strong>, which is even more efficient and faster than <code>grouped_mm</code>.</p>
 <section id="usage-2" class="level3">
 <h3 class="anchored" data-anchor-id="usage-2">Usage</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb14"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="st">"axolotl.integrations.kd.KDPlugin"</span></span>
-<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a><span class="fu">kd_trainer</span><span class="kw">:</span><span class="at"> </span><span class="ch">True</span></span>
-<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a><span class="fu">kd_ce_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
-<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a><span class="fu">kd_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.9</span></span>
-<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a><span class="fu">kd_temperature</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
-<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a><span class="fu">torch_compile</span><span class="kw">:</span><span class="at"> </span><span class="ch">True</span><span class="co">  # torch&gt;=2.6.0, recommended to reduce vram</span></span>
-<span id="cb14-10"><a href="#cb14-10" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb14-11"><a href="#cb14-11" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
-<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
-<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> </span><span class="st">"axolotl.integrations.kd.chat_template"</span></span>
-<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_messages</span><span class="kw">:</span><span class="at"> </span><span class="st">"messages_combined"</span></span>
-<span id="cb14-15"><a href="#cb14-15" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">logprobs_field</span><span class="kw">:</span><span class="at"> </span><span class="st">"llm_text_generation_vllm_logprobs"</span><span class="co">  # for kd only, field of logprobs</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<p>Add the following to your axolotl YAML config:</p>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb15"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.kernels.KernelsPlugin</span></span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_kernels</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="fu">use_scattermoe</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<p><strong>Important:</strong> Setting <code>experts_implementation</code> is incompatible with <code>use_scattermoe</code>.</p>
+</section>
+<section id="how-it-works-1" class="level3">
+<h3 class="anchored" data-anchor-id="how-it-works-1">How It Works</h3>
+<p>The <code>KernelsPlugin</code> runs before model loading and:</p>
+<ol type="1">
+<li>Registers the ScatterMoE kernel from the <a href="https://huggingface.co/axolotl-ai-co/scattermoe"><code>axolotl-ai-co/scattermoe</code></a> Hub repo.</li>
+<li>Patches the model’s <code>SparseMoeBlock</code> forward method with the optimized ScatterMoE implementation.</li>
+</ol>
+<p>This works for any MoE model in transformers that uses a <code>SparseMoeBlock</code> class (Mixtral, Qwen2-MoE, OLMoE, etc.).</p>
+</section>
+<section id="limitations-1" class="level3">
+<h3 class="anchored" data-anchor-id="limitations-1">Limitations</h3>
+<p>ScatterMoE uses a softmax -&gt; topk routing, so results may be different for some model arch as baseline (GPT-OSS, GLM_MOE_DSA).</p>
+</section>
+<section id="note-on-megablocks" class="level3">
+<h3 class="anchored" data-anchor-id="note-on-megablocks">Note on MegaBlocks</h3>
+<p>We tested <a href="https://huggingface.co/kernels-community/megablocks">MegaBlocks</a> but were unable to ensure numerical accuracy, so we did not integrate it. It was also incompatible with many newer model architectures in transformers.</p>
+<p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/kernels">here</a></p>
+</section>
+</section>
+<section id="knowledge-distillation-kd" class="level2">
+<h2 class="anchored" data-anchor-id="knowledge-distillation-kd">Knowledge Distillation (KD)</h2>
+<section id="usage-3" class="level3">
+<h3 class="anchored" data-anchor-id="usage-3">Usage</h3>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb16"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="st">"axolotl.integrations.kd.KDPlugin"</span></span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="fu">kd_trainer</span><span class="kw">:</span><span class="at"> </span><span class="ch">True</span></span>
+<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="fu">kd_ce_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
+<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="fu">kd_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.9</span></span>
+<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a><span class="fu">kd_temperature</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
+<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a><span class="fu">torch_compile</span><span class="kw">:</span><span class="at"> </span><span class="ch">True</span><span class="co">  # torch&gt;=2.6.0, recommended to reduce vram</span></span>
+<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
+<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> </span><span class="st">"axolotl.integrations.kd.chat_template"</span></span>
+<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_messages</span><span class="kw">:</span><span class="at"> </span><span class="st">"messages_combined"</span></span>
+<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">logprobs_field</span><span class="kw">:</span><span class="at"> </span><span class="st">"llm_text_generation_vllm_logprobs"</span><span class="co">  # for kd only, field of logprobs</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>An example dataset can be found at <a href="https://huggingface.co/datasets/axolotl-ai-co/evolkit-logprobs-pipeline-75k-v2-sample"><code>axolotl-ai-co/evolkit-logprobs-pipeline-75k-v2-sample</code></a></p>
 <p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/kd">here</a></p>
 </section>
@@ -1233,34 +1281,34 @@ The quick brown fox jumps over the loud dog</code></pre>
 <h3 class="anchored" data-anchor-id="requirements-1">Requirements</h3>
 <ul>
 <li><p>Axolotl with <code>llmcompressor</code> extras:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb15"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="st">"axolotl[llmcompressor]"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb17"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="st">"axolotl[llmcompressor]"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
 <li><p>Requires <code>llmcompressor &gt;= 0.5.1</code></p></li>
 </ul>
 <p>This will install all necessary dependencies to fine-tune sparsified models using the integration.</p>
 <hr>
 </section>
-<section id="usage-3" class="level3">
-<h3 class="anchored" data-anchor-id="usage-3">Usage</h3>
+<section id="usage-4" class="level3">
+<h3 class="anchored" data-anchor-id="usage-4">Usage</h3>
 <p>To enable sparse fine-tuning with this integration, include the plugin in your Axolotl config:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb16"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.llm_compressor.LLMCompressorPlugin</span></span>
-<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="fu">llmcompressor</span><span class="kw">:</span></span>
-<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">recipe</span><span class="kw">:</span></span>
-<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">finetuning_stage</span><span class="kw">:</span></span>
-<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">finetuning_modifiers</span><span class="kw">:</span></span>
-<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="fu">ConstantPruningModifier</span><span class="kw">:</span></span>
-<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a><span class="at">          </span><span class="fu">targets</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span></span>
-<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*q_proj.weight'</span><span class="kw">,</span></span>
-<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*k_proj.weight'</span><span class="kw">,</span></span>
-<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*v_proj.weight'</span><span class="kw">,</span></span>
-<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*o_proj.weight'</span><span class="kw">,</span></span>
-<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*gate_proj.weight'</span><span class="kw">,</span></span>
-<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*up_proj.weight'</span><span class="kw">,</span></span>
-<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*down_proj.weight'</span><span class="kw">,</span></span>
-<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a><span class="at">          </span><span class="kw">]</span></span>
-<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a><span class="at">          </span><span class="fu">start</span><span class="kw">:</span><span class="at"> </span><span class="dv">0</span></span>
-<span id="cb16-19"><a href="#cb16-19" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">save_compressed</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb18"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.llm_compressor.LLMCompressorPlugin</span></span>
+<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a><span class="fu">llmcompressor</span><span class="kw">:</span></span>
+<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">recipe</span><span class="kw">:</span></span>
+<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">finetuning_stage</span><span class="kw">:</span></span>
+<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">finetuning_modifiers</span><span class="kw">:</span></span>
+<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="fu">ConstantPruningModifier</span><span class="kw">:</span></span>
+<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a><span class="at">          </span><span class="fu">targets</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span></span>
+<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*q_proj.weight'</span><span class="kw">,</span></span>
+<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*k_proj.weight'</span><span class="kw">,</span></span>
+<span id="cb18-12"><a href="#cb18-12" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*v_proj.weight'</span><span class="kw">,</span></span>
+<span id="cb18-13"><a href="#cb18-13" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*o_proj.weight'</span><span class="kw">,</span></span>
+<span id="cb18-14"><a href="#cb18-14" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*gate_proj.weight'</span><span class="kw">,</span></span>
+<span id="cb18-15"><a href="#cb18-15" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*up_proj.weight'</span><span class="kw">,</span></span>
+<span id="cb18-16"><a href="#cb18-16" aria-hidden="true" tabindex="-1"></a><span class="at">            </span><span class="st">'re:.*down_proj.weight'</span><span class="kw">,</span></span>
+<span id="cb18-17"><a href="#cb18-17" aria-hidden="true" tabindex="-1"></a><span class="at">          </span><span class="kw">]</span></span>
+<span id="cb18-18"><a href="#cb18-18" aria-hidden="true" tabindex="-1"></a><span class="at">          </span><span class="fu">start</span><span class="kw">:</span><span class="at"> </span><span class="dv">0</span></span>
+<span id="cb18-19"><a href="#cb18-19" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">save_compressed</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>This plugin <strong>does not apply pruning or sparsification itself</strong> — it is intended for <strong>fine-tuning models that have already been sparsified</strong>.</p>
 <p>Pre-sparsified checkpoints can be:
 - Generated using <a href="https://github.com/vllm-project/llm-compressor">LLMCompressor</a>
@@ -1287,22 +1335,22 @@ The quick brown fox jumps over the loud dog</code></pre>
 <p>After fine-tuning your sparse model, you can leverage vLLM for efficient inference.
 You can also use LLMCompressor to apply additional quantization to your fine-tuned
 sparse model before inference for even greater performance benefits.:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> vllm <span class="im">import</span> LLM, SamplingParams</span>
-<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>prompts <span class="op">=</span> [</span>
-<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">"Hello, my name is"</span>,</span>
-<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">"The president of the United States is"</span>,</span>
-<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>    <span class="st">"The capital of France is"</span>,</span>
-<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a>    <span class="st">"The future of AI is"</span>,</span>
-<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a>]</span>
-<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a>sampling_params <span class="op">=</span> SamplingParams(temperature<span class="op">=</span><span class="fl">0.8</span>, top_p<span class="op">=</span><span class="fl">0.95</span>)</span>
-<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a>llm <span class="op">=</span> LLM(<span class="st">"path/to/your/sparse/model"</span>)</span>
-<span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a>outputs <span class="op">=</span> llm.generate(prompts, sampling_params)</span>
-<span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> output <span class="kw">in</span> outputs:</span>
-<span id="cb17-14"><a href="#cb17-14" aria-hidden="true" tabindex="-1"></a>    prompt <span class="op">=</span> output.prompt</span>
-<span id="cb17-15"><a href="#cb17-15" aria-hidden="true" tabindex="-1"></a>    generated_text <span class="op">=</span> output.outputs[<span class="dv">0</span>].text</span>
-<span id="cb17-16"><a href="#cb17-16" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(<span class="ss">f"Prompt: </span><span class="sc">{</span>prompt<span class="sc">!r}</span><span class="ss">, Generated text: </span><span class="sc">{</span>generated_text<span class="sc">!r}</span><span class="ss">"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> vllm <span class="im">import</span> LLM, SamplingParams</span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>prompts <span class="op">=</span> [</span>
+<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">"Hello, my name is"</span>,</span>
+<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">"The president of the United States is"</span>,</span>
+<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a>    <span class="st">"The capital of France is"</span>,</span>
+<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a>    <span class="st">"The future of AI is"</span>,</span>
+<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a>]</span>
+<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a>sampling_params <span class="op">=</span> SamplingParams(temperature<span class="op">=</span><span class="fl">0.8</span>, top_p<span class="op">=</span><span class="fl">0.95</span>)</span>
+<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a>llm <span class="op">=</span> LLM(<span class="st">"path/to/your/sparse/model"</span>)</span>
+<span id="cb19-11"><a href="#cb19-11" aria-hidden="true" tabindex="-1"></a>outputs <span class="op">=</span> llm.generate(prompts, sampling_params)</span>
+<span id="cb19-12"><a href="#cb19-12" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb19-13"><a href="#cb19-13" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> output <span class="kw">in</span> outputs:</span>
+<span id="cb19-14"><a href="#cb19-14" aria-hidden="true" tabindex="-1"></a>    prompt <span class="op">=</span> output.prompt</span>
+<span id="cb19-15"><a href="#cb19-15" aria-hidden="true" tabindex="-1"></a>    generated_text <span class="op">=</span> output.outputs[<span class="dv">0</span>].text</span>
+<span id="cb19-16"><a href="#cb19-16" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(<span class="ss">f"Prompt: </span><span class="sc">{</span>prompt<span class="sc">!r}</span><span class="ss">, Generated text: </span><span class="sc">{</span>generated_text<span class="sc">!r}</span><span class="ss">"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>For more details on vLLM’s capabilities and advanced configuration options, see the <a href="https://docs.vllm.ai/">official vLLM documentation</a>.</p>
 </section>
 <section id="learn-more" class="level3">
@@ -1316,31 +1364,66 @@ sparse model before inference for even greater performance benefits.:</p>
 <h2 class="anchored" data-anchor-id="language-model-evaluation-harness-lm-eval">Language Model Evaluation Harness (LM Eval)</h2>
 <p>Run evaluation on model using the popular lm-evaluation-harness library.</p>
 <p>See https://github.com/EleutherAI/lm-evaluation-harness</p>
-<section id="usage-4" class="level3">
-<h3 class="anchored" data-anchor-id="usage-4">Usage</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb18"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.lm_eval.LMEvalPlugin</span></span>
-<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_tasks</span><span class="kw">:</span></span>
-<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> gsm8k</span></span>
-<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> hellaswag</span></span>
-<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> arc_easy</span></span>
-<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_batch_size</span><span class="kw">:</span><span class="co"> # Batch size for evaluation</span></span>
-<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span><span class="co"> # Directory to save evaluation results</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<section id="usage-5" class="level3">
+<h3 class="anchored" data-anchor-id="usage-5">Usage</h3>
+<p>There are two ways to use the LM Eval integration:</p>
+</section>
+<section id="post-training-evaluation" class="level3">
+<h3 class="anchored" data-anchor-id="post-training-evaluation">1. Post-Training Evaluation</h3>
+<p>When training with the plugin enabled, evaluation runs automatically after training completes:</p>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.lm_eval.LMEvalPlugin</span></span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_tasks</span><span class="kw">:</span></span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> gsm8k</span></span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> hellaswag</span></span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> arc_easy</span></span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_batch_size</span><span class="kw">:</span><span class="co"> # Batch size for evaluation</span></span>
+<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<p>Run training as usual:</p>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb21"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> train config.yml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+</section>
+<section id="standalone-cli-evaluation" class="level3">
+<h3 class="anchored" data-anchor-id="standalone-cli-evaluation">2. Standalone CLI Evaluation</h3>
+<p>Evaluate any model directly without training:</p>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb22"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_model</span><span class="kw">:</span><span class="at"> meta-llama/Llama-2-7b-hf</span></span>
+<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.lm_eval.LMEvalPlugin</span></span>
+<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_tasks</span><span class="kw">:</span></span>
+<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> gsm8k</span></span>
+<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> hellaswag</span></span>
+<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> arc_easy</span></span>
+<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_batch_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span></span>
+<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span><span class="at"> ./outputs</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<p>Run evaluation:</p>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb23"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> lm-eval config.yml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+</section>
+<section id="model-selection-priority" class="level3">
+<h3 class="anchored" data-anchor-id="model-selection-priority">Model Selection Priority</h3>
+<p>The model to evaluate is selected in the following priority order:</p>
+<ol type="1">
+<li><strong><code>lm_eval_model</code></strong> - Explicit model path or HuggingFace repo (highest priority)</li>
+<li><strong><code>hub_model_id</code></strong> - Trained model pushed to HuggingFace Hub</li>
+<li><strong><code>output_dir</code></strong> - Local checkpoint directory containing trained model weights</li>
+</ol>
 </section>
 <section id="citation-2" class="level3">
 <h3 class="anchored" data-anchor-id="citation-2">Citation</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb19"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="va">@misc</span>{<span class="ot">eval</span>-<span class="ot">harness</span>,</span>
-<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>  <span class="dt">author</span>       = {Gao, Leo and Tow, Jonathan and Abbasi, Baber and Biderman, Stella and Black, Sid and DiPofi, Anthony and Foster, Charles and Golding, Laurence and Hsu, Jeffrey and Le Noac'h, Alain and Li, Haonan and McDonell, Kyle and Muennighoff, Niklas and Ociepa, Chris and Phang, Jason and Reynolds, Laria and Schoelkopf, Hailey and Skowron, Aviya and Sutawika, Lintang and Tang, Eric and Thite, Anish and Wang, Ben and Wang, Kevin and Zou, Andy},</span>
-<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>  <span class="dt">title</span>        = {A framework for few-shot language model evaluation},</span>
-<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a>  <span class="dt">month</span>        = 07,</span>
-<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a>  <span class="dt">year</span>         = 2024,</span>
-<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a>  <span class="dt">publisher</span>    = {Zenodo},</span>
-<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a>  <span class="dt">version</span>      = {v0.4.3},</span>
-<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a>  <span class="dt">doi</span>          = {10.5281/zenodo.12608602},</span>
-<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a>  <span class="dt">url</span>          = {https://zenodo.org/records/12608602}</span>
-<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb24"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="va">@misc</span>{<span class="ot">eval</span>-<span class="ot">harness</span>,</span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>  <span class="dt">author</span>       = {Gao, Leo and Tow, Jonathan and Abbasi, Baber and Biderman, Stella and Black, Sid and DiPofi, Anthony and Foster, Charles and Golding, Laurence and Hsu, Jeffrey and Le Noac'h, Alain and Li, Haonan and McDonell, Kyle and Muennighoff, Niklas and Ociepa, Chris and Phang, Jason and Reynolds, Laria and Schoelkopf, Hailey and Skowron, Aviya and Sutawika, Lintang and Tang, Eric and Thite, Anish and Wang, Ben and Wang, Kevin and Zou, Andy},</span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>  <span class="dt">title</span>        = {A framework for few-shot language model evaluation},</span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>  <span class="dt">month</span>        = 07,</span>
+<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>  <span class="dt">year</span>         = 2024,</span>
+<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a>  <span class="dt">publisher</span>    = {Zenodo},</span>
+<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a>  <span class="dt">version</span>      = {v0.4.3},</span>
+<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a>  <span class="dt">doi</span>          = {10.5281/zenodo.12608602},</span>
+<span id="cb24-9"><a href="#cb24-9" aria-hidden="true" tabindex="-1"></a>  <span class="dt">url</span>          = {https://zenodo.org/records/12608602}</span>
+<span id="cb24-10"><a href="#cb24-10" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/lm_eval">here</a></p>
 </section>
 </section>
@@ -1353,17 +1436,17 @@ sparse model before inference for even greater performance benefits.:</p>
 <li>Compatibility with both FSDP and DeepSpeed</li>
 </ul>
 <p>See https://github.com/linkedin/Liger-Kernel</p>
-<section id="usage-5" class="level3">
-<h3 class="anchored" data-anchor-id="usage-5">Usage</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.liger.LigerPlugin</span></span>
-<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_rope</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_rms_norm</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_glu_activation</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_layer_norm</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_fused_linear_cross_entropy</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_use_token_scaling</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<section id="usage-6" class="level3">
+<h3 class="anchored" data-anchor-id="usage-6">Usage</h3>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb25"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.liger.LigerPlugin</span></span>
+<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_rope</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_rms_norm</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_glu_activation</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_layer_norm</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_fused_linear_cross_entropy</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb25-8"><a href="#cb25-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb25-9"><a href="#cb25-9" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_use_token_scaling</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="supported-models-2" class="level3">
 <h3 class="anchored" data-anchor-id="supported-models-2">Supported Models</h3>
@@ -1389,16 +1472,16 @@ sparse model before inference for even greater performance benefits.:</p>
 </section>
 <section id="citation-3" class="level3">
 <h3 class="anchored" data-anchor-id="citation-3">Citation</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb21"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="va">@article</span>{<span class="ot">hsu2024ligerkernelefficienttriton</span>,</span>
-<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>      <span class="dt">title</span>={Liger Kernel: Efficient Triton Kernels for LLM Training},</span>
-<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>      <span class="dt">author</span>={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen},</span>
-<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>      <span class="dt">year</span>={2024},</span>
-<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a>      <span class="dt">eprint</span>={2410.10989},</span>
-<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a>      <span class="dt">archivePrefix</span>={arXiv},</span>
-<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a>      <span class="dt">primaryClass</span>={cs.LG},</span>
-<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a>      <span class="dt">url</span>={https://arxiv.org/abs/2410.10989},</span>
-<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a>      <span class="dt">journal</span>={arXiv preprint arXiv:2410.10989},</span>
-<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb26"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="va">@article</span>{<span class="ot">hsu2024ligerkernelefficienttriton</span>,</span>
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>      <span class="dt">title</span>={Liger Kernel: Efficient Triton Kernels for LLM Training},</span>
+<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>      <span class="dt">author</span>={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen},</span>
+<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>      <span class="dt">year</span>={2024},</span>
+<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a>      <span class="dt">eprint</span>={2410.10989},</span>
+<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a>      <span class="dt">archivePrefix</span>={arXiv},</span>
+<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a>      <span class="dt">primaryClass</span>={cs.LG},</span>
+<span id="cb26-8"><a href="#cb26-8" aria-hidden="true" tabindex="-1"></a>      <span class="dt">url</span>={https://arxiv.org/abs/2410.10989},</span>
+<span id="cb26-9"><a href="#cb26-9" aria-hidden="true" tabindex="-1"></a>      <span class="dt">journal</span>={arXiv preprint arXiv:2410.10989},</span>
+<span id="cb26-10"><a href="#cb26-10" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/liger">here</a></p>
 </section>
 </section>
@@ -1412,25 +1495,25 @@ sparse model before inference for even greater performance benefits.:</p>
 <p>Spectrum is a tool for scanning and evaluating the Signal-to-Noise Ratio (SNR) of layers in large language models.
 By identifying the top n% of layers with the highest SNR, you can optimize training efficiency.</p>
 </section>
-<section id="usage-6" class="level3">
-<h3 class="anchored" data-anchor-id="usage-6">Usage</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb22"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.spectrum.SpectrumPlugin</span></span>
-<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a><span class="fu">spectrum_top_fraction</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span></span>
-<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a><span class="fu">spectrum_model_name</span><span class="kw">:</span><span class="at"> meta-llama/Meta-Llama-3.1-8B</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<section id="usage-7" class="level3">
+<h3 class="anchored" data-anchor-id="usage-7">Usage</h3>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb27"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.spectrum.SpectrumPlugin</span></span>
+<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a><span class="fu">spectrum_top_fraction</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span></span>
+<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a><span class="fu">spectrum_model_name</span><span class="kw">:</span><span class="at"> meta-llama/Meta-Llama-3.1-8B</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="citation-4" class="level3">
 <h3 class="anchored" data-anchor-id="citation-4">Citation</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb23"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="va">@misc</span>{<span class="ot">hartford2024spectrumtargetedtrainingsignal</span>,</span>
-<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>      <span class="dt">title</span>={Spectrum: Targeted Training on Signal to Noise Ratio},</span>
-<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>      <span class="dt">author</span>={Eric Hartford and Lucas Atkins and Fernando Fernandes Neto and David Golchinfar},</span>
-<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>      <span class="dt">year</span>={2024},</span>
-<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a>      <span class="dt">eprint</span>={2406.06623},</span>
-<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a>      <span class="dt">archivePrefix</span>={arXiv},</span>
-<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a>      <span class="dt">primaryClass</span>={cs.LG},</span>
-<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a>      <span class="dt">url</span>={https://arxiv.org/abs/2406.06623},</span>
-<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb28"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="va">@misc</span>{<span class="ot">hartford2024spectrumtargetedtrainingsignal</span>,</span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>      <span class="dt">title</span>={Spectrum: Targeted Training on Signal to Noise Ratio},</span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>      <span class="dt">author</span>={Eric Hartford and Lucas Atkins and Fernando Fernandes Neto and David Golchinfar},</span>
+<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>      <span class="dt">year</span>={2024},</span>
+<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a>      <span class="dt">eprint</span>={2406.06623},</span>
+<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a>      <span class="dt">archivePrefix</span>={arXiv},</span>
+<span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a>      <span class="dt">primaryClass</span>={cs.LG},</span>
+<span id="cb28-8"><a href="#cb28-8" aria-hidden="true" tabindex="-1"></a>      <span class="dt">url</span>={https://arxiv.org/abs/2406.06623},</span>
+<span id="cb28-9"><a href="#cb28-9" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/spectrum">here</a></p>
 </section>
 </section>
@@ -1454,7 +1537,7 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 </section>
 <section id="installation-2" class="level3">
 <h3 class="anchored" data-anchor-id="installation-2">Installation</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb24"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install swanlab</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb29"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install swanlab</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="quick-start" class="level3">
 <h3 class="anchored" data-anchor-id="quick-start">Quick Start</h3>
@@ -1466,23 +1549,23 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 <section id="configure-axolotl-config-file" class="level3">
 <h3 class="anchored" data-anchor-id="configure-axolotl-config-file">2. Configure Axolotl Config File</h3>
 <p>Add SwanLab configuration to your Axolotl YAML config:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb25"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-llm-project</span></span>
-<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> qwen-finetune-v1</span></span>
-<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span><span class="co">  # Options: cloud, local, offline, disabled</span></span>
-<span id="cb25-8"><a href="#cb25-8" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_workspace</span><span class="kw">:</span><span class="at"> my-team</span><span class="co">  # Optional: organization name</span></span>
-<span id="cb25-9"><a href="#cb25-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_api_key</span><span class="kw">:</span><span class="at"> YOUR_API_KEY</span><span class="co">  # Optional: can also use env var SWANLAB_API_KEY</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb30"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-llm-project</span></span>
+<span id="cb30-6"><a href="#cb30-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> qwen-finetune-v1</span></span>
+<span id="cb30-7"><a href="#cb30-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span><span class="co">  # Options: cloud, local, offline, disabled</span></span>
+<span id="cb30-8"><a href="#cb30-8" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_workspace</span><span class="kw">:</span><span class="at"> my-team</span><span class="co">  # Optional: organization name</span></span>
+<span id="cb30-9"><a href="#cb30-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_api_key</span><span class="kw">:</span><span class="at"> YOUR_API_KEY</span><span class="co">  # Optional: can also use env var SWANLAB_API_KEY</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="run-training" class="level3">
 <h3 class="anchored" data-anchor-id="run-training">3. Run Training</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb26"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="bu">export</span> <span class="va">SWANLAB_API_KEY</span><span class="op">=</span>your-api-key-here</span>
-<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> login</span>
-<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a><span class="ex">accelerate</span> launch <span class="at">-m</span> axolotl.cli.train your-config.yaml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb31"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="bu">export</span> <span class="va">SWANLAB_API_KEY</span><span class="op">=</span>your-api-key-here</span>
+<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> login</span>
+<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a><span class="ex">accelerate</span> launch <span class="at">-m</span> axolotl.cli.train your-config.yaml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="configuration-options" class="level3">
 <h3 class="anchored" data-anchor-id="configuration-options">Configuration Options</h3>
@@ -1624,46 +1707,46 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 </section>
 <section id="example-1-basic-cloud-sync" class="level3">
 <h3 class="anchored" data-anchor-id="example-1-basic-cloud-sync">Example 1: Basic Cloud Sync</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb27"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> llama-finetune</span></span>
-<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> llama-3-8b-instruct-v1</span></span>
-<span id="cb27-7"><a href="#cb27-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb32"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> llama-finetune</span></span>
+<span id="cb32-6"><a href="#cb32-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> llama-3-8b-instruct-v1</span></span>
+<span id="cb32-7"><a href="#cb32-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="example-2-offlinelocal-mode" class="level3">
 <h3 class="anchored" data-anchor-id="example-2-offlinelocal-mode">Example 2: Offline/Local Mode</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb28"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> local-experiments</span></span>
-<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> test-run-1</span></span>
-<span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> local</span><span class="co">  # or 'offline'</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb33"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> local-experiments</span></span>
+<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> test-run-1</span></span>
+<span id="cb33-7"><a href="#cb33-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> local</span><span class="co">  # or 'offline'</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="example-3-team-workspace" class="level3">
 <h3 class="anchored" data-anchor-id="example-3-team-workspace">Example 3: Team Workspace</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb29"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> research-project</span></span>
-<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> experiment-42</span></span>
-<span id="cb29-7"><a href="#cb29-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_workspace</span><span class="kw">:</span><span class="at"> my-research-team</span></span>
-<span id="cb29-8"><a href="#cb29-8" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb34"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> research-project</span></span>
+<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> experiment-42</span></span>
+<span id="cb34-7"><a href="#cb34-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_workspace</span><span class="kw">:</span><span class="at"> my-research-team</span></span>
+<span id="cb34-8"><a href="#cb34-8" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="example-4-private-deployment" class="level3">
 <h3 class="anchored" data-anchor-id="example-4-private-deployment">Example 4: Private Deployment</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb30"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> internal-project</span></span>
-<span id="cb30-6"><a href="#cb30-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> secure-training</span></span>
-<span id="cb30-7"><a href="#cb30-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
-<span id="cb30-8"><a href="#cb30-8" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_web_host</span><span class="kw">:</span><span class="at"> https://swanlab.yourcompany.com</span></span>
-<span id="cb30-9"><a href="#cb30-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_api_host</span><span class="kw">:</span><span class="at"> https://api.swanlab.yourcompany.com</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb35"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> internal-project</span></span>
+<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> secure-training</span></span>
+<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
+<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_web_host</span><span class="kw">:</span><span class="at"> https://swanlab.yourcompany.com</span></span>
+<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_api_host</span><span class="kw">:</span><span class="at"> https://api.swanlab.yourcompany.com</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="team-notifications-with-lark-feishu" class="level3">
 <h3 class="anchored" data-anchor-id="team-notifications-with-lark-feishu">Team Notifications with Lark (Feishu)</h3>
@@ -1684,30 +1767,30 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 <section id="example-5-basic-lark-notifications" class="level3">
 <h3 class="anchored" data-anchor-id="example-5-basic-lark-notifications">Example 5: Basic Lark Notifications</h3>
 <p>Send training notifications to a Lark group chat:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb31"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> production-training</span></span>
-<span id="cb31-6"><a href="#cb31-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> llama-3-finetune-v2</span></span>
-<span id="cb31-7"><a href="#cb31-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
-<span id="cb31-8"><a href="#cb31-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb31-9"><a href="#cb31-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb36"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> production-training</span></span>
+<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> llama-3-finetune-v2</span></span>
+<span id="cb36-7"><a href="#cb36-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
+<span id="cb36-8"><a href="#cb36-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb36-9"><a href="#cb36-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Note</strong>: This configuration will work, but you’ll see a security warning recommending HMAC secret configuration.</p>
 </section>
 <section id="example-6-lark-notifications-with-hmac-security-recommended" class="level3">
 <h3 class="anchored" data-anchor-id="example-6-lark-notifications-with-hmac-security-recommended">Example 6: Lark Notifications with HMAC Security (Recommended)</h3>
 <p>For production use, enable HMAC signature verification:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb32"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> production-training</span></span>
-<span id="cb32-6"><a href="#cb32-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> llama-3-finetune-v2</span></span>
-<span id="cb32-7"><a href="#cb32-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
-<span id="cb32-8"><a href="#cb32-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb32-9"><a href="#cb32-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx</span></span>
-<span id="cb32-10"><a href="#cb32-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-webhook-secret-key</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb37"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb37-4"><a href="#cb37-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb37-5"><a href="#cb37-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> production-training</span></span>
+<span id="cb37-6"><a href="#cb37-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> llama-3-finetune-v2</span></span>
+<span id="cb37-7"><a href="#cb37-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
+<span id="cb37-8"><a href="#cb37-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb37-9"><a href="#cb37-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx</span></span>
+<span id="cb37-10"><a href="#cb37-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-webhook-secret-key</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Why HMAC secret matters</strong>:
 - Prevents unauthorized parties from sending fake notifications to your Lark group
 - Ensures notifications genuinely come from your training jobs
@@ -1716,17 +1799,17 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 <section id="example-7-team-workspace-lark-notifications" class="level3">
 <h3 class="anchored" data-anchor-id="example-7-team-workspace-lark-notifications">Example 7: Team Workspace + Lark Notifications</h3>
 <p>Combine team workspace collaboration with Lark notifications:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb33"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> research-project</span></span>
-<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> multimodal-experiment-42</span></span>
-<span id="cb33-7"><a href="#cb33-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_workspace</span><span class="kw">:</span><span class="at"> ml-research-team</span></span>
-<span id="cb33-8"><a href="#cb33-8" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
-<span id="cb33-9"><a href="#cb33-9" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb33-10"><a href="#cb33-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx</span></span>
-<span id="cb33-11"><a href="#cb33-11" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-webhook-secret-key</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb38"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> research-project</span></span>
+<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> multimodal-experiment-42</span></span>
+<span id="cb38-7"><a href="#cb38-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_workspace</span><span class="kw">:</span><span class="at"> ml-research-team</span></span>
+<span id="cb38-8"><a href="#cb38-8" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
+<span id="cb38-9"><a href="#cb38-9" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb38-10"><a href="#cb38-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxx</span></span>
+<span id="cb38-11"><a href="#cb38-11" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-webhook-secret-key</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="what-notifications-are-sent" class="level3">
 <h3 class="anchored" data-anchor-id="what-notifications-are-sent">What Notifications Are Sent?</h3>
@@ -1746,34 +1829,34 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 <p>The plugin validates your Lark configuration at startup:</p>
 <section id="valid-configurations" class="level4">
 <h4 class="anchored" data-anchor-id="valid-configurations">✅ Valid Configurations</h4>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb34"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxx</span></span>
-<span id="cb34-7"><a href="#cb34-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-secret</span></span>
-<span id="cb34-8"><a href="#cb34-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb34-9"><a href="#cb34-9" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb34-10"><a href="#cb34-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb34-11"><a href="#cb34-11" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxx</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb39"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxx</span></span>
+<span id="cb39-7"><a href="#cb39-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-secret</span></span>
+<span id="cb39-8"><a href="#cb39-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb39-9"><a href="#cb39-9" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb39-10"><a href="#cb39-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb39-11"><a href="#cb39-11" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxx</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 </section>
 <section id="security-best-practices" class="level3">
 <h3 class="anchored" data-anchor-id="security-best-practices">Security Best Practices</h3>
 <ol type="1">
 <li><p><strong>Always use HMAC secret in production</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb35"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/...</span></span>
-<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-secret-key</span><span class="co">  # ✅ Add this!</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb40"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/...</span></span>
+<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-secret-key</span><span class="co">  # ✅ Add this!</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
 <li><p><strong>Store secrets in environment variables</strong> (even better):</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb36"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="co"># In your training script/environment</span></span>
-<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a><span class="at">export SWANLAB_LARK_WEBHOOK_URL="https://open.feishu.cn/..."</span></span>
-<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a><span class="at">export SWANLAB_LARK_SECRET="your-secret-key"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb41"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a><span class="co"># In your training script/environment</span></span>
+<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a><span class="at">export SWANLAB_LARK_WEBHOOK_URL="https://open.feishu.cn/..."</span></span>
+<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a><span class="at">export SWANLAB_LARK_SECRET="your-secret-key"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>Then in config:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb37"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SwanLab plugin will auto-detect environment variables</span></span>
-<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb37-4"><a href="#cb37-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Lark URL and secret read from env vars</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb42"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SwanLab plugin will auto-detect environment variables</span></span>
+<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Lark URL and secret read from env vars</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
 <li><p><strong>Rotate webhook secrets periodically</strong>: Update your Lark bot’s secret every 90 days</p></li>
 <li><p><strong>Use separate webhooks for dev/prod</strong>: Don’t mix development and production notifications</p></li>
 </ol>
@@ -1784,7 +1867,7 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 - Only <strong>rank 0</strong> sends notifications
 - Other GPU ranks skip Lark registration
 - Prevents duplicate messages in multi-GPU training</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb38"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="ex">torchrun</span> <span class="at">--nproc_per_node</span><span class="op">=</span>4 <span class="at">-m</span> axolotl.cli.train config.yml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb43"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="ex">torchrun</span> <span class="at">--nproc_per_node</span><span class="op">=</span>4 <span class="at">-m</span> axolotl.cli.train config.yml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="rlhf-completion-table-logging" class="level3">
 <h3 class="anchored" data-anchor-id="rlhf-completion-table-logging">RLHF Completion Table Logging</h3>
@@ -1846,33 +1929,33 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 </section>
 <section id="example-dpo-training-with-completion-logging" class="level3">
 <h3 class="anchored" data-anchor-id="example-dpo-training-with-completion-logging">Example: DPO Training with Completion Logging</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb39"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> dpo-training</span></span>
-<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> llama-3-dpo-v1</span></span>
-<span id="cb39-7"><a href="#cb39-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
-<span id="cb39-8"><a href="#cb39-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb39-9"><a href="#cb39-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_log_completions</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb39-10"><a href="#cb39-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_completion_log_interval</span><span class="kw">:</span><span class="at"> </span><span class="dv">100</span><span class="co">  # Log every 100 steps</span></span>
-<span id="cb39-11"><a href="#cb39-11" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_completion_max_buffer</span><span class="kw">:</span><span class="at"> </span><span class="dv">128</span><span class="co">    # Keep last 128 completions</span></span>
-<span id="cb39-12"><a href="#cb39-12" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb39-13"><a href="#cb39-13" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
-<span id="cb39-14"><a href="#cb39-14" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
-<span id="cb39-15"><a href="#cb39-15" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> /path/to/preference_dataset</span></span>
-<span id="cb39-16"><a href="#cb39-16" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb44"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb44-3"><a href="#cb44-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb44-4"><a href="#cb44-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb44-5"><a href="#cb44-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> dpo-training</span></span>
+<span id="cb44-6"><a href="#cb44-6" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> llama-3-dpo-v1</span></span>
+<span id="cb44-7"><a href="#cb44-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
+<span id="cb44-8"><a href="#cb44-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb44-9"><a href="#cb44-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_log_completions</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb44-10"><a href="#cb44-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_completion_log_interval</span><span class="kw">:</span><span class="at"> </span><span class="dv">100</span><span class="co">  # Log every 100 steps</span></span>
+<span id="cb44-11"><a href="#cb44-11" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_completion_max_buffer</span><span class="kw">:</span><span class="at"> </span><span class="dv">128</span><span class="co">    # Keep last 128 completions</span></span>
+<span id="cb44-12"><a href="#cb44-12" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb44-13"><a href="#cb44-13" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
+<span id="cb44-14"><a href="#cb44-14" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb44-15"><a href="#cb44-15" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> /path/to/preference_dataset</span></span>
+<span id="cb44-16"><a href="#cb44-16" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="example-disable-completion-logging" class="level3">
 <h3 class="anchored" data-anchor-id="example-disable-completion-logging">Example: Disable Completion Logging</h3>
 <p>If you’re doing a quick test run or don’t need completion tables:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb40"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb40-3"><a href="#cb40-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb40-4"><a href="#cb40-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb40-5"><a href="#cb40-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> dpo-training</span></span>
-<span id="cb40-6"><a href="#cb40-6" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb40-7"><a href="#cb40-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_log_completions</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb45"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb45-3"><a href="#cb45-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb45-4"><a href="#cb45-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb45-5"><a href="#cb45-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> dpo-training</span></span>
+<span id="cb45-6"><a href="#cb45-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb45-7"><a href="#cb45-7" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_log_completions</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="supported-rlhf-trainers" class="level3">
 <h3 class="anchored" data-anchor-id="supported-rlhf-trainers">Supported RLHF Trainers</h3>
@@ -1886,8 +1969,8 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 </ul>
 <p>For non-RLHF trainers (standard supervised fine-tuning), the completion callback is automatically skipped.</p>
 </section>
-<section id="how-it-works-1" class="level3">
-<h3 class="anchored" data-anchor-id="how-it-works-1">How It Works</h3>
+<section id="how-it-works-2" class="level3">
+<h3 class="anchored" data-anchor-id="how-it-works-2">How It Works</h3>
 <ol type="1">
 <li><strong>Auto-detection</strong>: Plugin detects trainer type at initialization</li>
 <li><strong>Buffering</strong>: Completions are buffered in memory (up to <code>swanlab_completion_max_buffer</code>)</li>
@@ -1917,9 +2000,9 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 <section id="memory-management" class="level3">
 <h3 class="anchored" data-anchor-id="memory-management">Memory Management</h3>
 <p>The completion buffer is <strong>memory-bounded</strong> to prevent memory leaks:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb41"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> collections <span class="im">import</span> deque</span>
-<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a><span class="bu">buffer</span> <span class="op">=</span> deque(maxlen<span class="op">=</span><span class="dv">128</span>)  <span class="co"># Old completions automatically dropped</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> collections <span class="im">import</span> deque</span>
+<span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb46-3"><a href="#cb46-3" aria-hidden="true" tabindex="-1"></a><span class="bu">buffer</span> <span class="op">=</span> deque(maxlen<span class="op">=</span><span class="dv">128</span>)  <span class="co"># Old completions automatically dropped</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Memory usage estimate</strong>:
 - Average completion: ~500 characters (prompt + responses)
 - Buffer size 128: ~64 KB (negligible)
@@ -1952,8 +2035,8 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 <h4 class="anchored" data-anchor-id="buffer-fills-up-too-quickly">Buffer fills up too quickly</h4>
 <p><strong>Cause</strong>: High logging frequency with small buffer size.</p>
 <p><strong>Solution</strong>: Increase buffer size or logging interval:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb42"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_completion_log_interval</span><span class="kw">:</span><span class="at"> </span><span class="dv">200</span><span class="co">  # Log less frequently</span></span>
-<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_completion_max_buffer</span><span class="kw">:</span><span class="at"> </span><span class="dv">512</span><span class="co">    # Larger buffer</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb47"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_completion_log_interval</span><span class="kw">:</span><span class="at"> </span><span class="dv">200</span><span class="co">  # Log less frequently</span></span>
+<span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_completion_max_buffer</span><span class="kw">:</span><span class="at"> </span><span class="dv">512</span><span class="co">    # Larger buffer</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="memory-usage-growing-over-time" class="level4">
 <h4 class="anchored" data-anchor-id="memory-usage-growing-over-time">Memory usage growing over time</h4>
@@ -1987,18 +2070,18 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 <section id="basic-usage-decorator" class="level3">
 <h3 class="anchored" data-anchor-id="basic-usage-decorator">Basic Usage: Decorator</h3>
 <p>Add profiling to any trainer method with the <code>@swanlab_profile</code> decorator:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb43"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> swanlab_profile</span>
-<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb43-3"><a href="#cb43-3" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> MyCustomTrainer(AxolotlTrainer):</span>
-<span id="cb43-4"><a href="#cb43-4" aria-hidden="true" tabindex="-1"></a>    <span class="at">@swanlab_profile</span></span>
-<span id="cb43-5"><a href="#cb43-5" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> training_step(<span class="va">self</span>, model, inputs):</span>
-<span id="cb43-6"><a href="#cb43-6" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Your training step logic</span></span>
-<span id="cb43-7"><a href="#cb43-7" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="bu">super</span>().training_step(model, inputs)</span>
-<span id="cb43-8"><a href="#cb43-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb43-9"><a href="#cb43-9" aria-hidden="true" tabindex="-1"></a>    <span class="at">@swanlab_profile</span></span>
-<span id="cb43-10"><a href="#cb43-10" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> prediction_step(<span class="va">self</span>, model, inputs, prediction_loss_only):</span>
-<span id="cb43-11"><a href="#cb43-11" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Your prediction logic</span></span>
-<span id="cb43-12"><a href="#cb43-12" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="bu">super</span>().prediction_step(model, inputs, prediction_loss_only)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb48"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> swanlab_profile</span>
+<span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb48-3"><a href="#cb48-3" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> MyCustomTrainer(AxolotlTrainer):</span>
+<span id="cb48-4"><a href="#cb48-4" aria-hidden="true" tabindex="-1"></a>    <span class="at">@swanlab_profile</span></span>
+<span id="cb48-5"><a href="#cb48-5" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> training_step(<span class="va">self</span>, model, inputs):</span>
+<span id="cb48-6"><a href="#cb48-6" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Your training step logic</span></span>
+<span id="cb48-7"><a href="#cb48-7" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="bu">super</span>().training_step(model, inputs)</span>
+<span id="cb48-8"><a href="#cb48-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb48-9"><a href="#cb48-9" aria-hidden="true" tabindex="-1"></a>    <span class="at">@swanlab_profile</span></span>
+<span id="cb48-10"><a href="#cb48-10" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> prediction_step(<span class="va">self</span>, model, inputs, prediction_loss_only):</span>
+<span id="cb48-11"><a href="#cb48-11" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Your prediction logic</span></span>
+<span id="cb48-12"><a href="#cb48-12" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="bu">super</span>().prediction_step(model, inputs, prediction_loss_only)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>The decorator automatically:
 1. Measures execution time with high-precision timer
 2. Logs to SwanLab as <code>profiling/Time taken: ClassName.method_name</code>
@@ -2008,45 +2091,45 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
 <section id="advanced-usage-context-manager" class="level3">
 <h3 class="anchored" data-anchor-id="advanced-usage-context-manager">Advanced Usage: Context Manager</h3>
 <p>For fine-grained profiling within a method:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> swanlab_profiling_context</span>
-<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb44-3"><a href="#cb44-3" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> MyTrainer(AxolotlTrainer):</span>
-<span id="cb44-4"><a href="#cb44-4" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> complex_training_step(<span class="va">self</span>, model, inputs):</span>
-<span id="cb44-5"><a href="#cb44-5" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Profile just the forward pass</span></span>
-<span id="cb44-6"><a href="#cb44-6" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> swanlab_profiling_context(<span class="va">self</span>, <span class="st">"forward_pass"</span>):</span>
-<span id="cb44-7"><a href="#cb44-7" aria-hidden="true" tabindex="-1"></a>            outputs <span class="op">=</span> model(<span class="op">**</span>inputs)</span>
-<span id="cb44-8"><a href="#cb44-8" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb44-9"><a href="#cb44-9" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Profile just the backward pass</span></span>
-<span id="cb44-10"><a href="#cb44-10" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> swanlab_profiling_context(<span class="va">self</span>, <span class="st">"backward_pass"</span>):</span>
-<span id="cb44-11"><a href="#cb44-11" aria-hidden="true" tabindex="-1"></a>            loss <span class="op">=</span> outputs.loss</span>
-<span id="cb44-12"><a href="#cb44-12" aria-hidden="true" tabindex="-1"></a>            loss.backward()</span>
-<span id="cb44-13"><a href="#cb44-13" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb44-14"><a href="#cb44-14" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> outputs</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> swanlab_profiling_context</span>
+<span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> MyTrainer(AxolotlTrainer):</span>
+<span id="cb49-4"><a href="#cb49-4" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> complex_training_step(<span class="va">self</span>, model, inputs):</span>
+<span id="cb49-5"><a href="#cb49-5" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Profile just the forward pass</span></span>
+<span id="cb49-6"><a href="#cb49-6" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> swanlab_profiling_context(<span class="va">self</span>, <span class="st">"forward_pass"</span>):</span>
+<span id="cb49-7"><a href="#cb49-7" aria-hidden="true" tabindex="-1"></a>            outputs <span class="op">=</span> model(<span class="op">**</span>inputs)</span>
+<span id="cb49-8"><a href="#cb49-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb49-9"><a href="#cb49-9" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Profile just the backward pass</span></span>
+<span id="cb49-10"><a href="#cb49-10" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> swanlab_profiling_context(<span class="va">self</span>, <span class="st">"backward_pass"</span>):</span>
+<span id="cb49-11"><a href="#cb49-11" aria-hidden="true" tabindex="-1"></a>            loss <span class="op">=</span> outputs.loss</span>
+<span id="cb49-12"><a href="#cb49-12" aria-hidden="true" tabindex="-1"></a>            loss.backward()</span>
+<span id="cb49-13"><a href="#cb49-13" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb49-14"><a href="#cb49-14" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> outputs</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="advanced-usage-profilingconfig" class="level3">
 <h3 class="anchored" data-anchor-id="advanced-usage-profilingconfig">Advanced Usage: ProfilingConfig</h3>
 <p>Filter and throttle profiling logs with <code>ProfilingConfig</code>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> (</span>
-<span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a>    swanlab_profiling_context_advanced,</span>
-<span id="cb45-3"><a href="#cb45-3" aria-hidden="true" tabindex="-1"></a>    ProfilingConfig,</span>
-<span id="cb45-4"><a href="#cb45-4" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb45-5"><a href="#cb45-5" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb45-6"><a href="#cb45-6" aria-hidden="true" tabindex="-1"></a>profiling_config <span class="op">=</span> ProfilingConfig(</span>
-<span id="cb45-7"><a href="#cb45-7" aria-hidden="true" tabindex="-1"></a>    enabled<span class="op">=</span><span class="va">True</span>,</span>
-<span id="cb45-8"><a href="#cb45-8" aria-hidden="true" tabindex="-1"></a>    min_duration_ms<span class="op">=</span><span class="fl">1.0</span>,    <span class="co"># Only log if duration &gt; 1ms</span></span>
-<span id="cb45-9"><a href="#cb45-9" aria-hidden="true" tabindex="-1"></a>    log_interval<span class="op">=</span><span class="dv">10</span>,        <span class="co"># Log every 10th call</span></span>
-<span id="cb45-10"><a href="#cb45-10" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb45-11"><a href="#cb45-11" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb45-12"><a href="#cb45-12" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> MyTrainer(AxolotlTrainer):</span>
-<span id="cb45-13"><a href="#cb45-13" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> frequently_called_method(<span class="va">self</span>, data):</span>
-<span id="cb45-14"><a href="#cb45-14" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> swanlab_profiling_context_advanced(</span>
-<span id="cb45-15"><a href="#cb45-15" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>,</span>
-<span id="cb45-16"><a href="#cb45-16" aria-hidden="true" tabindex="-1"></a>            <span class="st">"frequent_op"</span>,</span>
-<span id="cb45-17"><a href="#cb45-17" aria-hidden="true" tabindex="-1"></a>            config<span class="op">=</span>profiling_config</span>
-<span id="cb45-18"><a href="#cb45-18" aria-hidden="true" tabindex="-1"></a>        ):</span>
-<span id="cb45-19"><a href="#cb45-19" aria-hidden="true" tabindex="-1"></a>            <span class="co"># This only logs every 10th call, and only if it takes &gt; 1ms</span></span>
-<span id="cb45-20"><a href="#cb45-20" aria-hidden="true" tabindex="-1"></a>            result <span class="op">=</span> expensive_computation(data)</span>
-<span id="cb45-21"><a href="#cb45-21" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> result</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb50"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> (</span>
+<span id="cb50-2"><a href="#cb50-2" aria-hidden="true" tabindex="-1"></a>    swanlab_profiling_context_advanced,</span>
+<span id="cb50-3"><a href="#cb50-3" aria-hidden="true" tabindex="-1"></a>    ProfilingConfig,</span>
+<span id="cb50-4"><a href="#cb50-4" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb50-5"><a href="#cb50-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb50-6"><a href="#cb50-6" aria-hidden="true" tabindex="-1"></a>profiling_config <span class="op">=</span> ProfilingConfig(</span>
+<span id="cb50-7"><a href="#cb50-7" aria-hidden="true" tabindex="-1"></a>    enabled<span class="op">=</span><span class="va">True</span>,</span>
+<span id="cb50-8"><a href="#cb50-8" aria-hidden="true" tabindex="-1"></a>    min_duration_ms<span class="op">=</span><span class="fl">1.0</span>,    <span class="co"># Only log if duration &gt; 1ms</span></span>
+<span id="cb50-9"><a href="#cb50-9" aria-hidden="true" tabindex="-1"></a>    log_interval<span class="op">=</span><span class="dv">10</span>,        <span class="co"># Log every 10th call</span></span>
+<span id="cb50-10"><a href="#cb50-10" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb50-11"><a href="#cb50-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb50-12"><a href="#cb50-12" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> MyTrainer(AxolotlTrainer):</span>
+<span id="cb50-13"><a href="#cb50-13" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> frequently_called_method(<span class="va">self</span>, data):</span>
+<span id="cb50-14"><a href="#cb50-14" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> swanlab_profiling_context_advanced(</span>
+<span id="cb50-15"><a href="#cb50-15" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>,</span>
+<span id="cb50-16"><a href="#cb50-16" aria-hidden="true" tabindex="-1"></a>            <span class="st">"frequent_op"</span>,</span>
+<span id="cb50-17"><a href="#cb50-17" aria-hidden="true" tabindex="-1"></a>            config<span class="op">=</span>profiling_config</span>
+<span id="cb50-18"><a href="#cb50-18" aria-hidden="true" tabindex="-1"></a>        ):</span>
+<span id="cb50-19"><a href="#cb50-19" aria-hidden="true" tabindex="-1"></a>            <span class="co"># This only logs every 10th call, and only if it takes &gt; 1ms</span></span>
+<span id="cb50-20"><a href="#cb50-20" aria-hidden="true" tabindex="-1"></a>            result <span class="op">=</span> expensive_computation(data)</span>
+<span id="cb50-21"><a href="#cb50-21" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> result</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>ProfilingConfig Parameters</strong>:
 - <code>enabled</code>: Enable/disable profiling globally (default: <code>True</code>)
 - <code>min_duration_ms</code>: Minimum duration to log in milliseconds (default: <code>0.1</code>)
@@ -2071,15 +2154,15 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <section id="configuration-in-axolotl-config" class="level3">
 <h3 class="anchored" data-anchor-id="configuration-in-axolotl-config">Configuration in Axolotl Config</h3>
 <p>Profiling is automatically enabled when SwanLab is enabled. No additional config needed:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb47"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb47-3"><a href="#cb47-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb47-4"><a href="#cb47-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb47-5"><a href="#cb47-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb52"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb52-3"><a href="#cb52-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb52-4"><a href="#cb52-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb52-5"><a href="#cb52-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>To disable profiling while keeping SwanLab enabled:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb48"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> DEFAULT_PROFILING_CONFIG</span>
-<span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb48-3"><a href="#cb48-3" aria-hidden="true" tabindex="-1"></a>DEFAULT_PROFILING_CONFIG.enabled <span class="op">=</span> <span class="va">False</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb53"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> DEFAULT_PROFILING_CONFIG</span>
+<span id="cb53-2"><a href="#cb53-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb53-3"><a href="#cb53-3" aria-hidden="true" tabindex="-1"></a>DEFAULT_PROFILING_CONFIG.enabled <span class="op">=</span> <span class="va">False</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="performance-impact-1" class="level3">
 <h3 class="anchored" data-anchor-id="performance-impact-1">Performance Impact</h3>
@@ -2103,41 +2186,41 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 </section>
 <section id="example-complete-profiling-setup" class="level3">
 <h3 class="anchored" data-anchor-id="example-complete-profiling-setup">Example: Complete Profiling Setup</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> (</span>
-<span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a>    swanlab_profile,</span>
-<span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a>    swanlab_profiling_context,</span>
-<span id="cb49-4"><a href="#cb49-4" aria-hidden="true" tabindex="-1"></a>    ProfilingConfig,</span>
-<span id="cb49-5"><a href="#cb49-5" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb49-6"><a href="#cb49-6" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb49-7"><a href="#cb49-7" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> OptimizedTrainer(AxolotlTrainer):</span>
-<span id="cb49-8"><a href="#cb49-8" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> <span class="fu">__init__</span>(<span class="va">self</span>, <span class="op">*</span>args, <span class="op">**</span>kwargs):</span>
-<span id="cb49-9"><a href="#cb49-9" aria-hidden="true" tabindex="-1"></a>        <span class="bu">super</span>().<span class="fu">__init__</span>(<span class="op">*</span>args, <span class="op">**</span>kwargs)</span>
-<span id="cb49-10"><a href="#cb49-10" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb49-11"><a href="#cb49-11" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Custom profiling config for high-frequency operations</span></span>
-<span id="cb49-12"><a href="#cb49-12" aria-hidden="true" tabindex="-1"></a>        <span class="va">self</span>.fast_op_config <span class="op">=</span> ProfilingConfig(</span>
-<span id="cb49-13"><a href="#cb49-13" aria-hidden="true" tabindex="-1"></a>            enabled<span class="op">=</span><span class="va">True</span>,</span>
-<span id="cb49-14"><a href="#cb49-14" aria-hidden="true" tabindex="-1"></a>            min_duration_ms<span class="op">=</span><span class="fl">0.5</span>,</span>
-<span id="cb49-15"><a href="#cb49-15" aria-hidden="true" tabindex="-1"></a>            log_interval<span class="op">=</span><span class="dv">50</span>,</span>
-<span id="cb49-16"><a href="#cb49-16" aria-hidden="true" tabindex="-1"></a>        )</span>
-<span id="cb49-17"><a href="#cb49-17" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb49-18"><a href="#cb49-18" aria-hidden="true" tabindex="-1"></a>    <span class="at">@swanlab_profile</span></span>
-<span id="cb49-19"><a href="#cb49-19" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> training_step(<span class="va">self</span>, model, inputs):</span>
-<span id="cb49-20"><a href="#cb49-20" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""Main training step - always profile."""</span></span>
-<span id="cb49-21"><a href="#cb49-21" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="bu">super</span>().training_step(model, inputs)</span>
-<span id="cb49-22"><a href="#cb49-22" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb49-23"><a href="#cb49-23" aria-hidden="true" tabindex="-1"></a>    <span class="at">@swanlab_profile</span></span>
-<span id="cb49-24"><a href="#cb49-24" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> compute_loss(<span class="va">self</span>, model, inputs, return_outputs<span class="op">=</span><span class="va">False</span>):</span>
-<span id="cb49-25"><a href="#cb49-25" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""Loss computation - always profile."""</span></span>
-<span id="cb49-26"><a href="#cb49-26" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="bu">super</span>().compute_loss(model, inputs, return_outputs)</span>
-<span id="cb49-27"><a href="#cb49-27" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb49-28"><a href="#cb49-28" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> _prepare_inputs(<span class="va">self</span>, inputs):</span>
-<span id="cb49-29"><a href="#cb49-29" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""High-frequency operation - throttled profiling."""</span></span>
-<span id="cb49-30"><a href="#cb49-30" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> swanlab_profiling_context_advanced(</span>
-<span id="cb49-31"><a href="#cb49-31" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>,</span>
-<span id="cb49-32"><a href="#cb49-32" aria-hidden="true" tabindex="-1"></a>            <span class="st">"prepare_inputs"</span>,</span>
-<span id="cb49-33"><a href="#cb49-33" aria-hidden="true" tabindex="-1"></a>            config<span class="op">=</span><span class="va">self</span>.fast_op_config,</span>
-<span id="cb49-34"><a href="#cb49-34" aria-hidden="true" tabindex="-1"></a>        ):</span>
-<span id="cb49-35"><a href="#cb49-35" aria-hidden="true" tabindex="-1"></a>            <span class="cf">return</span> <span class="bu">super</span>()._prepare_inputs(inputs)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.integrations.swanlab.profiling <span class="im">import</span> (</span>
+<span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a>    swanlab_profile,</span>
+<span id="cb54-3"><a href="#cb54-3" aria-hidden="true" tabindex="-1"></a>    swanlab_profiling_context,</span>
+<span id="cb54-4"><a href="#cb54-4" aria-hidden="true" tabindex="-1"></a>    ProfilingConfig,</span>
+<span id="cb54-5"><a href="#cb54-5" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb54-6"><a href="#cb54-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb54-7"><a href="#cb54-7" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> OptimizedTrainer(AxolotlTrainer):</span>
+<span id="cb54-8"><a href="#cb54-8" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> <span class="fu">__init__</span>(<span class="va">self</span>, <span class="op">*</span>args, <span class="op">**</span>kwargs):</span>
+<span id="cb54-9"><a href="#cb54-9" aria-hidden="true" tabindex="-1"></a>        <span class="bu">super</span>().<span class="fu">__init__</span>(<span class="op">*</span>args, <span class="op">**</span>kwargs)</span>
+<span id="cb54-10"><a href="#cb54-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb54-11"><a href="#cb54-11" aria-hidden="true" tabindex="-1"></a>        <span class="co"># Custom profiling config for high-frequency operations</span></span>
+<span id="cb54-12"><a href="#cb54-12" aria-hidden="true" tabindex="-1"></a>        <span class="va">self</span>.fast_op_config <span class="op">=</span> ProfilingConfig(</span>
+<span id="cb54-13"><a href="#cb54-13" aria-hidden="true" tabindex="-1"></a>            enabled<span class="op">=</span><span class="va">True</span>,</span>
+<span id="cb54-14"><a href="#cb54-14" aria-hidden="true" tabindex="-1"></a>            min_duration_ms<span class="op">=</span><span class="fl">0.5</span>,</span>
+<span id="cb54-15"><a href="#cb54-15" aria-hidden="true" tabindex="-1"></a>            log_interval<span class="op">=</span><span class="dv">50</span>,</span>
+<span id="cb54-16"><a href="#cb54-16" aria-hidden="true" tabindex="-1"></a>        )</span>
+<span id="cb54-17"><a href="#cb54-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb54-18"><a href="#cb54-18" aria-hidden="true" tabindex="-1"></a>    <span class="at">@swanlab_profile</span></span>
+<span id="cb54-19"><a href="#cb54-19" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> training_step(<span class="va">self</span>, model, inputs):</span>
+<span id="cb54-20"><a href="#cb54-20" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""Main training step - always profile."""</span></span>
+<span id="cb54-21"><a href="#cb54-21" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="bu">super</span>().training_step(model, inputs)</span>
+<span id="cb54-22"><a href="#cb54-22" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb54-23"><a href="#cb54-23" aria-hidden="true" tabindex="-1"></a>    <span class="at">@swanlab_profile</span></span>
+<span id="cb54-24"><a href="#cb54-24" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> compute_loss(<span class="va">self</span>, model, inputs, return_outputs<span class="op">=</span><span class="va">False</span>):</span>
+<span id="cb54-25"><a href="#cb54-25" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""Loss computation - always profile."""</span></span>
+<span id="cb54-26"><a href="#cb54-26" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="bu">super</span>().compute_loss(model, inputs, return_outputs)</span>
+<span id="cb54-27"><a href="#cb54-27" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb54-28"><a href="#cb54-28" aria-hidden="true" tabindex="-1"></a>    <span class="kw">def</span> _prepare_inputs(<span class="va">self</span>, inputs):</span>
+<span id="cb54-29"><a href="#cb54-29" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""High-frequency operation - throttled profiling."""</span></span>
+<span id="cb54-30"><a href="#cb54-30" aria-hidden="true" tabindex="-1"></a>        <span class="cf">with</span> swanlab_profiling_context_advanced(</span>
+<span id="cb54-31"><a href="#cb54-31" aria-hidden="true" tabindex="-1"></a>            <span class="va">self</span>,</span>
+<span id="cb54-32"><a href="#cb54-32" aria-hidden="true" tabindex="-1"></a>            <span class="st">"prepare_inputs"</span>,</span>
+<span id="cb54-33"><a href="#cb54-33" aria-hidden="true" tabindex="-1"></a>            config<span class="op">=</span><span class="va">self</span>.fast_op_config,</span>
+<span id="cb54-34"><a href="#cb54-34" aria-hidden="true" tabindex="-1"></a>        ):</span>
+<span id="cb54-35"><a href="#cb54-35" aria-hidden="true" tabindex="-1"></a>            <span class="cf">return</span> <span class="bu">super</span>()._prepare_inputs(inputs)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="troubleshooting-1" class="level3">
 <h3 class="anchored" data-anchor-id="troubleshooting-1">Troubleshooting</h3>
@@ -2145,8 +2228,8 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <h4 class="anchored" data-anchor-id="profiling-metrics-not-appearing-in-swanlab">Profiling metrics not appearing in SwanLab</h4>
 <p><strong>Cause</strong>: SwanLab is not enabled or not initialized.</p>
 <p><strong>Solution</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb50"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb50-2"><a href="#cb50-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb55"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb55-2"><a href="#cb55-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>Check logs for:</p>
 <pre class="text"><code>INFO: SwanLab initialized for project: my-project</code></pre>
 </section>
@@ -2154,10 +2237,10 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <h4 class="anchored" data-anchor-id="too-many-profiling-metrics-cluttering-dashboard">Too many profiling metrics cluttering dashboard</h4>
 <p><strong>Cause</strong>: Profiling every function call for high-frequency operations.</p>
 <p><strong>Solution</strong>: Use <code>ProfilingConfig</code> with throttling:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb52"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a>config <span class="op">=</span> ProfilingConfig(</span>
-<span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a>    min_duration_ms<span class="op">=</span><span class="fl">1.0</span>,    <span class="co"># Skip fast ops</span></span>
-<span id="cb52-3"><a href="#cb52-3" aria-hidden="true" tabindex="-1"></a>    log_interval<span class="op">=</span><span class="dv">100</span>,       <span class="co"># Log every 100th call</span></span>
-<span id="cb52-4"><a href="#cb52-4" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb57"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a>config <span class="op">=</span> ProfilingConfig(</span>
+<span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a>    min_duration_ms<span class="op">=</span><span class="fl">1.0</span>,    <span class="co"># Skip fast ops</span></span>
+<span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a>    log_interval<span class="op">=</span><span class="dv">100</span>,       <span class="co"># Log every 100th call</span></span>
+<span id="cb57-4"><a href="#cb57-4" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="profiling-overhead-impacting-training-speed" class="level4">
 <h4 class="anchored" data-anchor-id="profiling-overhead-impacting-training-speed">Profiling overhead impacting training speed</h4>
@@ -2179,32 +2262,32 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <section id="complete-config-example" class="level3">
 <h3 class="anchored" data-anchor-id="complete-config-example">Complete Config Example</h3>
 <p>Here’s a complete example integrating SwanLab with your RVQ-Alpha training:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb53"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a><span class="fu">base_model</span><span class="kw">:</span><span class="at"> /path/to/your/model</span></span>
-<span id="cb53-2"><a href="#cb53-2" aria-hidden="true" tabindex="-1"></a><span class="fu">model_type</span><span class="kw">:</span><span class="at"> Qwen2ForCausalLM</span></span>
-<span id="cb53-3"><a href="#cb53-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb53-4"><a href="#cb53-4" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb53-5"><a href="#cb53-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb53-6"><a href="#cb53-6" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin</span></span>
-<span id="cb53-7"><a href="#cb53-7" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb53-8"><a href="#cb53-8" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb53-9"><a href="#cb53-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> RVQ-Alpha-Training</span></span>
-<span id="cb53-10"><a href="#cb53-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> Qwen2.5-7B-MetaQA-Perturb-P020</span></span>
-<span id="cb53-11"><a href="#cb53-11" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_description</span><span class="kw">:</span><span class="at"> </span><span class="st">"Training on MetaQA and Perturbation datasets with NEW-RVQ encoding"</span></span>
-<span id="cb53-12"><a href="#cb53-12" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
-<span id="cb53-13"><a href="#cb53-13" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_workspace</span><span class="kw">:</span><span class="at"> single-cell-genomics</span></span>
-<span id="cb53-14"><a href="#cb53-14" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb53-15"><a href="#cb53-15" aria-hidden="true" tabindex="-1"></a><span class="fu">sequence_len</span><span class="kw">:</span><span class="at"> </span><span class="dv">32768</span></span>
-<span id="cb53-16"><a href="#cb53-16" aria-hidden="true" tabindex="-1"></a><span class="fu">micro_batch_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
-<span id="cb53-17"><a href="#cb53-17" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_accumulation_steps</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
-<span id="cb53-18"><a href="#cb53-18" aria-hidden="true" tabindex="-1"></a><span class="fu">num_epochs</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
-<span id="cb53-19"><a href="#cb53-19" aria-hidden="true" tabindex="-1"></a><span class="fu">learning_rate</span><span class="kw">:</span><span class="at"> </span><span class="fl">2e-5</span></span>
-<span id="cb53-20"><a href="#cb53-20" aria-hidden="true" tabindex="-1"></a><span class="fu">optimizer</span><span class="kw">:</span><span class="at"> adamw_torch_fused</span></span>
-<span id="cb53-21"><a href="#cb53-21" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb53-22"><a href="#cb53-22" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
-<span id="cb53-23"><a href="#cb53-23" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> /path/to/dataset</span></span>
-<span id="cb53-24"><a href="#cb53-24" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template</span></span>
-<span id="cb53-25"><a href="#cb53-25" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb53-26"><a href="#cb53-26" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span><span class="at"> ./outputs</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb58"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="fu">base_model</span><span class="kw">:</span><span class="at"> /path/to/your/model</span></span>
+<span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a><span class="fu">model_type</span><span class="kw">:</span><span class="at"> Qwen2ForCausalLM</span></span>
+<span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb58-4"><a href="#cb58-4" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb58-5"><a href="#cb58-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb58-6"><a href="#cb58-6" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin</span></span>
+<span id="cb58-7"><a href="#cb58-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb58-8"><a href="#cb58-8" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb58-9"><a href="#cb58-9" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> RVQ-Alpha-Training</span></span>
+<span id="cb58-10"><a href="#cb58-10" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_experiment_name</span><span class="kw">:</span><span class="at"> Qwen2.5-7B-MetaQA-Perturb-P020</span></span>
+<span id="cb58-11"><a href="#cb58-11" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_description</span><span class="kw">:</span><span class="at"> </span><span class="st">"Training on MetaQA and Perturbation datasets with NEW-RVQ encoding"</span></span>
+<span id="cb58-12"><a href="#cb58-12" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span>
+<span id="cb58-13"><a href="#cb58-13" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_workspace</span><span class="kw">:</span><span class="at"> single-cell-genomics</span></span>
+<span id="cb58-14"><a href="#cb58-14" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb58-15"><a href="#cb58-15" aria-hidden="true" tabindex="-1"></a><span class="fu">sequence_len</span><span class="kw">:</span><span class="at"> </span><span class="dv">32768</span></span>
+<span id="cb58-16"><a href="#cb58-16" aria-hidden="true" tabindex="-1"></a><span class="fu">micro_batch_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
+<span id="cb58-17"><a href="#cb58-17" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_accumulation_steps</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
+<span id="cb58-18"><a href="#cb58-18" aria-hidden="true" tabindex="-1"></a><span class="fu">num_epochs</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
+<span id="cb58-19"><a href="#cb58-19" aria-hidden="true" tabindex="-1"></a><span class="fu">learning_rate</span><span class="kw">:</span><span class="at"> </span><span class="fl">2e-5</span></span>
+<span id="cb58-20"><a href="#cb58-20" aria-hidden="true" tabindex="-1"></a><span class="fu">optimizer</span><span class="kw">:</span><span class="at"> adamw_torch_fused</span></span>
+<span id="cb58-21"><a href="#cb58-21" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb58-22"><a href="#cb58-22" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb58-23"><a href="#cb58-23" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> /path/to/dataset</span></span>
+<span id="cb58-24"><a href="#cb58-24" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template</span></span>
+<span id="cb58-25"><a href="#cb58-25" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb58-26"><a href="#cb58-26" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span><span class="at"> ./outputs</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="modes-explained" class="level3">
 <h3 class="anchored" data-anchor-id="modes-explained">Modes Explained</h3>
@@ -2250,36 +2333,36 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <p>The plugin validates your configuration at startup and provides clear error messages with solutions:</p>
 <section id="missing-project-name" class="level4">
 <h4 class="anchored" data-anchor-id="missing-project-name">Missing Project Name</h4>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb54"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb59"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Solution</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb55"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb55-2"><a href="#cb55-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb60"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb60-1"><a href="#cb60-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb60-2"><a href="#cb60-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="invalid-mode" class="level4">
 <h4 class="anchored" data-anchor-id="invalid-mode">Invalid Mode</h4>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb56"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb56-2"><a href="#cb56-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb56-3"><a href="#cb56-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> invalid-mode</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb61"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb61-2"><a href="#cb61-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb61-3"><a href="#cb61-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> invalid-mode</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Solution</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb57"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span><span class="co">  # or: local, offline, disabled</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb62"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb62-2"><a href="#cb62-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb62-3"><a href="#cb62-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span><span class="co">  # or: local, offline, disabled</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="empty-project-name" class="level4">
 <h4 class="anchored" data-anchor-id="empty-project-name">Empty Project Name</h4>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb58"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> </span><span class="st">""</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb63"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb63-1"><a href="#cb63-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb63-2"><a href="#cb63-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> </span><span class="st">""</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Solution</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb59"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb64"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 </section>
 <section id="cloud-mode-api-key-warning" class="level3">
 <h3 class="anchored" data-anchor-id="cloud-mode-api-key-warning">Cloud Mode API Key Warning</h3>
 <p>When using <code>cloud</code> mode without an API key, you’ll receive a warning with multiple solutions:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb60"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb60-1"><a href="#cb60-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb60-2"><a href="#cb60-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb60-3"><a href="#cb60-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb65"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb65-2"><a href="#cb65-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb65-3"><a href="#cb65-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Solutions</strong>:
 1. Set environment variable: <code>export SWANLAB_API_KEY=your-api-key</code>
 2. Add to config (less secure): <code>swanlab_api_key: your-api-key</code>
@@ -2291,11 +2374,11 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <p>Using multiple logging tools simultaneously (SwanLab + WandB + MLflow + Comet) can impact training performance:</p>
 <section id="two-loggers---warning" class="level4">
 <h4 class="anchored" data-anchor-id="two-loggers---warning">Two Loggers - Warning</h4>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb61"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb61-2"><a href="#cb61-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb61-3"><a href="#cb61-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb61-4"><a href="#cb61-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb61-5"><a href="#cb61-5" aria-hidden="true" tabindex="-1"></a><span class="fu">wandb_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb66"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb66-2"><a href="#cb66-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb66-3"><a href="#cb66-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb66-4"><a href="#cb66-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb66-5"><a href="#cb66-5" aria-hidden="true" tabindex="-1"></a><span class="fu">wandb_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Impact</strong>:
 - Performance overhead: ~1-2% per logger (cumulative)
 - Increased memory usage
@@ -2311,14 +2394,14 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 </section>
 <section id="three-loggers---error-level-warning" class="level4">
 <h4 class="anchored" data-anchor-id="three-loggers---error-level-warning">Three+ Loggers - Error-Level Warning</h4>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb62"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb62-2"><a href="#cb62-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb62-3"><a href="#cb62-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb62-4"><a href="#cb62-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb62-5"><a href="#cb62-5" aria-hidden="true" tabindex="-1"></a><span class="fu">wandb_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb62-6"><a href="#cb62-6" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb62-7"><a href="#cb62-7" aria-hidden="true" tabindex="-1"></a><span class="fu">use_mlflow</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb62-8"><a href="#cb62-8" aria-hidden="true" tabindex="-1"></a><span class="fu">mlflow_tracking_uri</span><span class="kw">:</span><span class="at"> http://localhost:5000</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb67"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb67-2"><a href="#cb67-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb67-3"><a href="#cb67-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb67-4"><a href="#cb67-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb67-5"><a href="#cb67-5" aria-hidden="true" tabindex="-1"></a><span class="fu">wandb_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb67-6"><a href="#cb67-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb67-7"><a href="#cb67-7" aria-hidden="true" tabindex="-1"></a><span class="fu">use_mlflow</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb67-8"><a href="#cb67-8" aria-hidden="true" tabindex="-1"></a><span class="fu">mlflow_tracking_uri</span><span class="kw">:</span><span class="at"> http://localhost:5000</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Why This Matters</strong>:
 - With 3 loggers: ~4-5% overhead per step → significant slowdown over long training
 - Example: 10,000 steps at 2s/step → ~400-500 seconds extra (6-8 minutes)
@@ -2329,17 +2412,17 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <section id="auto-enable-logic" class="level3">
 <h3 class="anchored" data-anchor-id="auto-enable-logic">Auto-Enable Logic</h3>
 <p>For convenience, SwanLab will auto-enable if you specify a project without setting <code>use_swanlab</code>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb63"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb63-1"><a href="#cb63-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb63-2"><a href="#cb63-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb63-3"><a href="#cb63-3" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb63-4"><a href="#cb63-4" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb68"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb68-2"><a href="#cb68-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb68-3"><a href="#cb68-3" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb68-4"><a href="#cb68-4" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="distributed-training-detection" class="level3">
 <h3 class="anchored" data-anchor-id="distributed-training-detection">Distributed Training Detection</h3>
 <p>In distributed training scenarios (multi-GPU), the plugin automatically detects and reports:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb64"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb64-3"><a href="#cb64-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb69"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb69-2"><a href="#cb69-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb69-3"><a href="#cb69-3" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Why Only Rank 0</strong>:
 - Avoids duplicate experiment runs
 - Reduces network/cloud API overhead on worker ranks
@@ -2350,15 +2433,15 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 </section>
 <section id="method-1-environment-variable-recommended" class="level3">
 <h3 class="anchored" data-anchor-id="method-1-environment-variable-recommended">Method 1: Environment Variable (Recommended)</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb65"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a><span class="bu">export</span> <span class="va">SWANLAB_API_KEY</span><span class="op">=</span>your-api-key-here</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb70"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb70-1"><a href="#cb70-1" aria-hidden="true" tabindex="-1"></a><span class="bu">export</span> <span class="va">SWANLAB_API_KEY</span><span class="op">=</span>your-api-key-here</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="method-2-login-command" class="level3">
 <h3 class="anchored" data-anchor-id="method-2-login-command">Method 2: Login Command</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb66"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> login</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb71"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb71-1"><a href="#cb71-1" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> login</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="method-3-config-file" class="level3">
 <h3 class="anchored" data-anchor-id="method-3-config-file">Method 3: Config File</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb67"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_api_key</span><span class="kw">:</span><span class="at"> your-api-key-here</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb72"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_api_key</span><span class="kw">:</span><span class="at"> your-api-key-here</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="what-gets-logged" class="level3">
 <h3 class="anchored" data-anchor-id="what-gets-logged">What Gets Logged?</h3>
@@ -2397,19 +2480,19 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 </section>
 <section id="local-mode-1" class="level3">
 <h3 class="anchored" data-anchor-id="local-mode-1">Local Mode</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb68"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> watch ./swanlog</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb73"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb73-1"><a href="#cb73-1" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> watch ./swanlog</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="integration-with-existing-tools" class="level3">
 <h3 class="anchored" data-anchor-id="integration-with-existing-tools">Integration with Existing Tools</h3>
 <p>SwanLab can work alongside other tracking tools:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb69"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb69-2"><a href="#cb69-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
-<span id="cb69-3"><a href="#cb69-3" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb69-4"><a href="#cb69-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb69-5"><a href="#cb69-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb69-6"><a href="#cb69-6" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb69-7"><a href="#cb69-7" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb69-8"><a href="#cb69-8" aria-hidden="true" tabindex="-1"></a><span class="fu">wandb_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb74"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb74-1"><a href="#cb74-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb74-2"><a href="#cb74-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span></span>
+<span id="cb74-3"><a href="#cb74-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb74-4"><a href="#cb74-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb74-5"><a href="#cb74-5" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb74-6"><a href="#cb74-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb74-7"><a href="#cb74-7" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb74-8"><a href="#cb74-8" aria-hidden="true" tabindex="-1"></a><span class="fu">wandb_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="troubleshooting-2" class="level3">
 <h3 class="anchored" data-anchor-id="troubleshooting-2">Troubleshooting</h3>
@@ -2420,20 +2503,20 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <h4 class="anchored" data-anchor-id="error-swanlab-enabled-but-swanlab_project-is-not-set">Error: “SwanLab enabled but ‘swanlab_project’ is not set”</h4>
 <p><strong>Cause</strong>: You enabled SwanLab (<code>use_swanlab: true</code>) but forgot to specify a project name.</p>
 <p><strong>Solution</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb70"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb70-1"><a href="#cb70-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb70-2"><a href="#cb70-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span><span class="co">  # Add this line</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb75"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb75-1"><a href="#cb75-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb75-2"><a href="#cb75-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span><span class="co">  # Add this line</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="error-invalid-swanlab_mode-xxx" class="level4">
 <h4 class="anchored" data-anchor-id="error-invalid-swanlab_mode-xxx">Error: “Invalid swanlab_mode: ‘xxx’”</h4>
 <p><strong>Cause</strong>: You provided an invalid mode value.</p>
 <p><strong>Solution</strong>: Use one of the valid modes:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb71"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb71-1"><a href="#cb71-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span><span class="co">     # or: local, offline, disabled</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb76"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb76-1"><a href="#cb76-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> cloud</span><span class="co">     # or: local, offline, disabled</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="error-swanlab_project-cannot-be-an-empty-string" class="level4">
 <h4 class="anchored" data-anchor-id="error-swanlab_project-cannot-be-an-empty-string">Error: “swanlab_project cannot be an empty string”</h4>
 <p><strong>Cause</strong>: You set <code>swanlab_project: ""</code> (empty string).</p>
 <p><strong>Solution</strong>: Either provide a valid name or remove the field:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb72"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb77"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb77-1"><a href="#cb77-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 </section>
 <section id="import-errors" class="level3">
@@ -2442,8 +2525,8 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <h4 class="anchored" data-anchor-id="error-swanlab-is-not-installed">Error: “SwanLab is not installed”</h4>
 <p><strong>Cause</strong>: SwanLab package is not installed in your environment.</p>
 <p><strong>Solution</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb73"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb73-1"><a href="#cb73-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install swanlab</span>
-<span id="cb73-2"><a href="#cb73-2" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install swanlab<span class="op">&gt;</span>=0.3.0</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb78"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb78-1"><a href="#cb78-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install swanlab</span>
+<span id="cb78-2"><a href="#cb78-2" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install swanlab<span class="op">&gt;</span>=0.3.0</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 </section>
 <section id="performance-issues" class="level3">
@@ -2453,14 +2536,14 @@ profiling/Time taken: MyTrainer.backward_pass</code></pre>
 <p><strong>Cause</strong>: You have multiple experiment tracking tools enabled (e.g., SwanLab + WandB + MLflow).</p>
 <p><strong>Impact</strong>: ~1-2% performance overhead per logger, cumulative.</p>
 <p><strong>Solution</strong>: For production training, disable all but one logger:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb74"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb74-1"><a href="#cb74-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb74-2"><a href="#cb74-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
-<span id="cb74-3"><a href="#cb74-3" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span><span class="co">      # Disable others</span></span>
-<span id="cb74-4"><a href="#cb74-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_mlflow</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
-<span id="cb74-5"><a href="#cb74-5" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb74-6"><a href="#cb74-6" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
-<span id="cb74-7"><a href="#cb74-7" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb74-8"><a href="#cb74-8" aria-hidden="true" tabindex="-1"></a><span class="fu">wandb_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb79"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb79-1"><a href="#cb79-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb79-2"><a href="#cb79-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span></span>
+<span id="cb79-3"><a href="#cb79-3" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span><span class="co">      # Disable others</span></span>
+<span id="cb79-4"><a href="#cb79-4" aria-hidden="true" tabindex="-1"></a><span class="fu">use_mlflow</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
+<span id="cb79-5"><a href="#cb79-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb79-6"><a href="#cb79-6" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
+<span id="cb79-7"><a href="#cb79-7" aria-hidden="true" tabindex="-1"></a><span class="fu">use_wandb</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb79-8"><a href="#cb79-8" aria-hidden="true" tabindex="-1"></a><span class="fu">wandb_project</span><span class="kw">:</span><span class="at"> my-project</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Exception</strong>: Multiple loggers are acceptable for:
 - Short comparison runs (&lt; 100 steps)
 - Migration testing between logging tools
@@ -2489,22 +2572,22 @@ Info: Other ranks will skip SwanLab to avoid conflicts</code></pre>
 <section id="api-key-errors" class="level3">
 <h3 class="anchored" data-anchor-id="api-key-errors">API Key errors</h3>
 <p><strong>Solution</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb76"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb76-1"><a href="#cb76-1" aria-hidden="true" tabindex="-1"></a><span class="bu">echo</span> <span class="va">$SWANLAB_API_KEY</span></span>
-<span id="cb76-2"><a href="#cb76-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb76-3"><a href="#cb76-3" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> login</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb81"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb81-1"><a href="#cb81-1" aria-hidden="true" tabindex="-1"></a><span class="bu">echo</span> <span class="va">$SWANLAB_API_KEY</span></span>
+<span id="cb81-2"><a href="#cb81-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb81-3"><a href="#cb81-3" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> login</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="cloud-sync-issues" class="level3">
 <h3 class="anchored" data-anchor-id="cloud-sync-issues">Cloud sync issues</h3>
 <p><strong>Solution</strong>: Use <code>offline</code> mode and sync later:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb77"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb77-1"><a href="#cb77-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> offline</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb82"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb82-1"><a href="#cb82-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_mode</span><span class="kw">:</span><span class="at"> offline</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>Then sync when ready:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb78"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb78-1"><a href="#cb78-1" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> sync ./swanlog</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb83"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb83-1"><a href="#cb83-1" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> sync ./swanlog</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="plugin-not-loaded" class="level3">
 <h3 class="anchored" data-anchor-id="plugin-not-loaded">Plugin not loaded</h3>
 <p><strong>Solution</strong>: Verify plugin path in config:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb79"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb79-1"><a href="#cb79-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb79-2"><a href="#cb79-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span><span class="co">  # Correct path</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb84"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb84-1"><a href="#cb84-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb84-2"><a href="#cb84-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.swanlab.SwanLabPlugin</span><span class="co">  # Correct path</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="lark-notification-issues" class="level3">
 <h3 class="anchored" data-anchor-id="lark-notification-issues">Lark Notification Issues</h3>
@@ -2512,17 +2595,17 @@ Info: Other ranks will skip SwanLab to avoid conflicts</code></pre>
 <h4 class="anchored" data-anchor-id="error-failed-to-import-swanlab-lark-plugin">Error: “Failed to import SwanLab Lark plugin”</h4>
 <p><strong>Cause</strong>: Your SwanLab version doesn’t include the Lark plugin (requires SwanLab &gt;= 0.3.0).</p>
 <p><strong>Solution</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb80"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb80-1"><a href="#cb80-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="at">--upgrade</span> swanlab</span>
-<span id="cb80-2"><a href="#cb80-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb80-3"><a href="#cb80-3" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="st">'swanlab&gt;=0.3.0'</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb85"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb85-1"><a href="#cb85-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="at">--upgrade</span> swanlab</span>
+<span id="cb85-2"><a href="#cb85-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb85-3"><a href="#cb85-3" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="st">'swanlab&gt;=0.3.0'</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="warning-lark-webhook-has-no-secret-configured" class="level4">
 <h4 class="anchored" data-anchor-id="warning-lark-webhook-has-no-secret-configured">Warning: “Lark webhook has no secret configured”</h4>
 <p><strong>Cause</strong>: You provided <code>swanlab_lark_webhook_url</code> but no <code>swanlab_lark_secret</code>.</p>
 <p><strong>Impact</strong>: Lark notifications will work, but without HMAC authentication (security risk).</p>
 <p><strong>Solution</strong>: Add HMAC secret for production use:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb81"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb81-1"><a href="#cb81-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxx</span></span>
-<span id="cb81-2"><a href="#cb81-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-webhook-secret</span><span class="co">  # Add this line</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb86"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb86-1"><a href="#cb86-1" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_webhook_url</span><span class="kw">:</span><span class="at"> https://open.feishu.cn/open-apis/bot/v2/hook/xxx</span></span>
+<span id="cb86-2"><a href="#cb86-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_lark_secret</span><span class="kw">:</span><span class="at"> your-webhook-secret</span><span class="co">  # Add this line</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>When it’s OK to skip secret</strong>:
 - Local development and testing
 - Internal networks with restricted access
@@ -2536,11 +2619,11 @@ Info: Other ranks will skip SwanLab to avoid conflicts</code></pre>
 <h4 class="anchored" data-anchor-id="error-failed-to-register-lark-callback">Error: “Failed to register Lark callback”</h4>
 <p><strong>Cause</strong>: Invalid webhook URL or network connectivity issues.</p>
 <p><strong>Diagnostic steps</strong>:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb82"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb82-1"><a href="#cb82-1" aria-hidden="true" tabindex="-1"></a><span class="ex">curl</span> <span class="at">-X</span> POST <span class="st">"YOUR_WEBHOOK_URL"</span> <span class="dt">\</span></span>
-<span id="cb82-2"><a href="#cb82-2" aria-hidden="true" tabindex="-1"></a>  <span class="at">-H</span> <span class="st">'Content-Type: application/json'</span> <span class="dt">\</span></span>
-<span id="cb82-3"><a href="#cb82-3" aria-hidden="true" tabindex="-1"></a>  <span class="at">-d</span> <span class="st">'{"msg_type":"text","content":{"text":"Test from Axolotl"}}'</span></span>
-<span id="cb82-4"><a href="#cb82-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb82-5"><a href="#cb82-5" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> show swanlab</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb87"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb87-1"><a href="#cb87-1" aria-hidden="true" tabindex="-1"></a><span class="ex">curl</span> <span class="at">-X</span> POST <span class="st">"YOUR_WEBHOOK_URL"</span> <span class="dt">\</span></span>
+<span id="cb87-2"><a href="#cb87-2" aria-hidden="true" tabindex="-1"></a>  <span class="at">-H</span> <span class="st">'Content-Type: application/json'</span> <span class="dt">\</span></span>
+<span id="cb87-3"><a href="#cb87-3" aria-hidden="true" tabindex="-1"></a>  <span class="at">-d</span> <span class="st">'{"msg_type":"text","content":{"text":"Test from Axolotl"}}'</span></span>
+<span id="cb87-4"><a href="#cb87-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb87-5"><a href="#cb87-5" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> show swanlab</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p><strong>Solution</strong>:
 1. Verify webhook URL is correct (copy from Lark bot settings)
 2. Check network connectivity to Lark API
@@ -2557,11 +2640,11 @@ Info: Other ranks will skip SwanLab to avoid conflicts</code></pre>
 INFO: Registered Lark notification callback with HMAC authentication</code></pre></li>
 <li><p><strong>Verify webhook in Lark</strong>: Test webhook manually (see above)</p></li>
 <li><p><strong>Check distributed training</strong>: Only rank 0 sends notifications</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb84"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb84-1"><a href="#cb84-1" aria-hidden="true" tabindex="-1"></a><span class="co"># If running multi-GPU, check rank 0 logs specifically</span></span>
-<span id="cb84-2"><a href="#cb84-2" aria-hidden="true" tabindex="-1"></a><span class="fu">grep</span> <span class="st">"Registered Lark"</span> logs/rank_0.log</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb89"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb89-1"><a href="#cb89-1" aria-hidden="true" tabindex="-1"></a><span class="co"># If running multi-GPU, check rank 0 logs specifically</span></span>
+<span id="cb89-2"><a href="#cb89-2" aria-hidden="true" tabindex="-1"></a><span class="fu">grep</span> <span class="st">"Registered Lark"</span> logs/rank_0.log</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
 <li><p><strong>Verify SwanLab is initialized</strong>: Lark callback needs SwanLab to be running</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb85"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb85-1"><a href="#cb85-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span><span class="co">  # Must be enabled</span></span>
-<span id="cb85-2"><a href="#cb85-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span><span class="co">  # Must be set</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb90"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb90-1"><a href="#cb90-1" aria-hidden="true" tabindex="-1"></a><span class="fu">use_swanlab</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span><span class="co">  # Must be enabled</span></span>
+<span id="cb90-2"><a href="#cb90-2" aria-hidden="true" tabindex="-1"></a><span class="fu">swanlab_project</span><span class="kw">:</span><span class="at"> my-project</span><span class="co">  # Must be set</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
 <li><p><strong>Check Lark bot permissions</strong>: Ensure bot is added to the target group chat</p></li>
 </ol>
 </section>
@@ -2629,16 +2712,16 @@ INFO: Registered Lark notification callback with HMAC authentication</code></pre
 <section id="custom-logging" class="level3">
 <h3 class="anchored" data-anchor-id="custom-logging">Custom Logging</h3>
 <p>You can add custom metrics in your callbacks:</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb86"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb86-1"><a href="#cb86-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> swanlab</span>
-<span id="cb86-2"><a href="#cb86-2" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb86-3"><a href="#cb86-3" aria-hidden="true" tabindex="-1"></a>swanlab.log({</span>
-<span id="cb86-4"><a href="#cb86-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">"custom_metric"</span>: value,</span>
-<span id="cb86-5"><a href="#cb86-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">"epoch"</span>: epoch_num</span>
-<span id="cb86-6"><a href="#cb86-6" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb91"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb91-1"><a href="#cb91-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> swanlab</span>
+<span id="cb91-2"><a href="#cb91-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb91-3"><a href="#cb91-3" aria-hidden="true" tabindex="-1"></a>swanlab.log({</span>
+<span id="cb91-4"><a href="#cb91-4" aria-hidden="true" tabindex="-1"></a>    <span class="st">"custom_metric"</span>: value,</span>
+<span id="cb91-5"><a href="#cb91-5" aria-hidden="true" tabindex="-1"></a>    <span class="st">"epoch"</span>: epoch_num</span>
+<span id="cb91-6"><a href="#cb91-6" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="experiment-comparison" class="level3">
 <h3 class="anchored" data-anchor-id="experiment-comparison">Experiment Comparison</h3>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb87"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb87-1"><a href="#cb87-1" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> compare run1 run2 run3</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb92"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb92-1"><a href="#cb92-1" aria-hidden="true" tabindex="-1"></a><span class="ex">swanlab</span> compare run1 run2 run3</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </section>
 <section id="support" class="level3">
 <h3 class="anchored" data-anchor-id="support">Support</h3>
@@ -2701,10 +2784,10 @@ Warning
 </div>
 <div class="callout-body-container callout-body">
 <p>If you could not load your integration, please ensure you are pip installing in editable mode.</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb88"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb88-1"><a href="#cb88-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="at">-e</span> .</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb93"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb93-1"><a href="#cb93-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="at">-e</span> .</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 <p>and correctly spelled the integration name in the config file.</p>
-<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb89"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb89-1"><a href="#cb89-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
-<span id="cb89-2"><a href="#cb89-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.your_integration_name.YourIntegrationPlugin</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
+<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb94"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb94-1"><a href="#cb94-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
+<span id="cb94-2"><a href="#cb94-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> axolotl.integrations.your_integration_name.YourIntegrationPlugin</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
 </div>
 </div>
 <div class="callout callout-style-default callout-note callout-titled">
diff --git a/search.json b/search.json
index 9015d8733..a5cfa1626 100644
--- a/search.json
+++ b/search.json
@@ -44,7 +44,7 @@
     "href": "docs/cli.html#command-reference",
     "title": "Command Line Interface (CLI)",
     "section": "Command Reference",
-    "text": "Command Reference\n\nfetch\nDownloads example configurations and deepspeed configs to your local machine.\n# Get example YAML files\naxolotl fetch examples\n\n# Get deepspeed config files\naxolotl fetch deepspeed_configs\n\n# Specify custom destination\naxolotl fetch examples --dest path/to/folder\n\n\npreprocess\nPreprocesses and tokenizes your dataset before training. This is recommended for large datasets.\n# Basic preprocessing\naxolotl preprocess config.yml\n\n# Preprocessing with one GPU\nCUDA_VISIBLE_DEVICES=\"0\" axolotl preprocess config.yml\n\n# Debug mode to see processed examples\naxolotl preprocess config.yml --debug\n\n# Debug with limited examples\naxolotl preprocess config.yml --debug --debug-num-examples 5\nConfiguration options:\ndataset_prepared_path: Local folder for saving preprocessed data\npush_dataset_to_hub: HuggingFace repo to push preprocessed data (optional)\n\n\ntrain\nTrains or fine-tunes a model using the configuration specified in your YAML file.\n# Basic training\naxolotl train config.yml\n\n# Train and set/override specific options\naxolotl train config.yml \\\n    --learning-rate 1e-4 \\\n    --micro-batch-size 2 \\\n    --num-epochs 3\n\n# Training without accelerate\naxolotl train config.yml --launcher python\n\n# Pass launcher-specific arguments using -- separator\naxolotl train config.yml --launcher torchrun -- --nproc_per_node=2 --nnodes=1\naxolotl train config.yml --launcher accelerate -- --config_file=accelerate_config.yml\n\n# Resume training from checkpoint\naxolotl train config.yml --resume-from-checkpoint path/to/checkpoint\nIt is possible to run sweeps over multiple hyperparameters by passing in a sweeps config.\n# Basic training with sweeps\naxolotl train config.yml --sweep path/to/sweep.yaml\nExample sweep config:\n_:\n  # This section is for dependent variables we need to fix\n  - load_in_8bit: false\n    load_in_4bit: false\n    adapter: lora\n  - load_in_8bit: true\n    load_in_4bit: false\n    adapter: lora\n\n# These are independent variables\nlearning_rate: [0.0003, 0.0006]\nlora_r:\n  - 16\n  - 32\nlora_alpha:\n  - 16\n  - 32\n  - 64\n\n\ninference\nRuns inference using your trained model in either CLI or Gradio interface mode.\n# CLI inference with LoRA\naxolotl inference config.yml --lora-model-dir=\"./outputs/lora-out\"\n\n# CLI inference with full model\naxolotl inference config.yml --base-model=\"./completed-model\"\n\n# Gradio web interface\naxolotl inference config.yml --gradio \\\n    --lora-model-dir=\"./outputs/lora-out\"\n\n# Inference with input from file\ncat prompt.txt | axolotl inference config.yml \\\n    --base-model=\"./completed-model\"\n\n\nmerge-lora\nMerges trained LoRA adapters into the base model.\n# Basic merge\naxolotl merge-lora config.yml\n\n# Specify LoRA directory (usually used with checkpoints)\naxolotl merge-lora config.yml --lora-model-dir=\"./lora-output/checkpoint-100\"\n\n# Merge using CPU (if out of GPU memory)\nCUDA_VISIBLE_DEVICES=\"\" axolotl merge-lora config.yml\nConfiguration options:\ngpu_memory_limit: Limit GPU memory usage\nlora_on_cpu: Load LoRA weights on CPU\n\n\nmerge-sharded-fsdp-weights\nMerges sharded FSDP model checkpoints into a single combined checkpoint.\n# Basic merge\naxolotl merge-sharded-fsdp-weights config.yml\n\n\nevaluate\nEvaluates a model’s performance (loss etc) on the train and eval datasets.\n# Basic evaluation\naxolotl evaluate config.yml\n\n# Evaluation with launcher arguments\naxolotl evaluate config.yml --launcher torchrun -- --nproc_per_node=2\n\n\nlm-eval\nRuns LM Evaluation Harness on your model.\n# Basic evaluation\naxolotl lm-eval config.yml\nConfiguration options:\n# List of tasks to evaluate\nlm_eval_tasks:\n  - arc_challenge\n  - hellaswag\nlm_eval_batch_size: # Batch size for evaluation\noutput_dir: # Directory to save evaluation results\nSee LM Eval Harness for more details.\n\n\ndelinearize-llama4\nDelinearizes a Llama 4 linearized model into a regular HuggingFace Llama 4 model. This only works with the non-quantized linearized model.\naxolotl delinearize-llama4 --model path/to/model_dir --output path/to/output_dir\nThis would be necessary to use with other frameworks. If you have an adapter, merge it with the non-quantized linearized model before delinearizing.\n\n\nquantize\nQuantizes a model using the quantization configuration specified in your YAML file.\naxolotl quantize config.yml\nSee Quantization for more details.",
+    "text": "Command Reference\n\nfetch\nDownloads example configurations and deepspeed configs to your local machine.\n# Get example YAML files\naxolotl fetch examples\n\n# Get deepspeed config files\naxolotl fetch deepspeed_configs\n\n# Specify custom destination\naxolotl fetch examples --dest path/to/folder\n\n\npreprocess\nPreprocesses and tokenizes your dataset before training. This is recommended for large datasets.\n# Basic preprocessing\naxolotl preprocess config.yml\n\n# Preprocessing with one GPU\nCUDA_VISIBLE_DEVICES=\"0\" axolotl preprocess config.yml\n\n# Debug mode to see processed examples\naxolotl preprocess config.yml --debug\n\n# Debug with limited examples\naxolotl preprocess config.yml --debug --debug-num-examples 5\nConfiguration options:\ndataset_prepared_path: Local folder for saving preprocessed data\npush_dataset_to_hub: HuggingFace repo to push preprocessed data (optional)\n\n\ntrain\nTrains or fine-tunes a model using the configuration specified in your YAML file.\n# Basic training\naxolotl train config.yml\n\n# Train and set/override specific options\naxolotl train config.yml \\\n    --learning-rate 1e-4 \\\n    --micro-batch-size 2 \\\n    --num-epochs 3\n\n# Training without accelerate\naxolotl train config.yml --launcher python\n\n# Pass launcher-specific arguments using -- separator\naxolotl train config.yml --launcher torchrun -- --nproc_per_node=2 --nnodes=1\naxolotl train config.yml --launcher accelerate -- --config_file=accelerate_config.yml\n\n# Resume training from checkpoint\naxolotl train config.yml --resume-from-checkpoint path/to/checkpoint\nIt is possible to run sweeps over multiple hyperparameters by passing in a sweeps config.\n# Basic training with sweeps\naxolotl train config.yml --sweep path/to/sweep.yaml\nExample sweep config:\n_:\n  # This section is for dependent variables we need to fix\n  - load_in_8bit: false\n    load_in_4bit: false\n    adapter: lora\n  - load_in_8bit: true\n    load_in_4bit: false\n    adapter: lora\n\n# These are independent variables\nlearning_rate: [0.0003, 0.0006]\nlora_r:\n  - 16\n  - 32\nlora_alpha:\n  - 16\n  - 32\n  - 64\n\n\ninference\nRuns inference using your trained model in either CLI or Gradio interface mode.\n# CLI inference with LoRA\naxolotl inference config.yml --lora-model-dir=\"./outputs/lora-out\"\n\n# CLI inference with full model\naxolotl inference config.yml --base-model=\"./completed-model\"\n\n# Gradio web interface\naxolotl inference config.yml --gradio \\\n    --lora-model-dir=\"./outputs/lora-out\"\n\n# Inference with input from file\ncat prompt.txt | axolotl inference config.yml \\\n    --base-model=\"./completed-model\"\n\n\nmerge-lora\nMerges trained LoRA adapters into the base model.\n# Basic merge\naxolotl merge-lora config.yml\n\n# Specify LoRA directory (usually used with checkpoints)\naxolotl merge-lora config.yml --lora-model-dir=\"./lora-output/checkpoint-100\"\n\n# Merge using CPU (if out of GPU memory)\nCUDA_VISIBLE_DEVICES=\"\" axolotl merge-lora config.yml\nConfiguration options:\ngpu_memory_limit: Limit GPU memory usage\nlora_on_cpu: Load LoRA weights on CPU\n\n\nmerge-sharded-fsdp-weights\nMerges sharded FSDP model checkpoints into a single combined checkpoint.\n# Basic merge\naxolotl merge-sharded-fsdp-weights config.yml\n\n\nevaluate\nEvaluates a model’s performance (loss etc) on the train and eval datasets.\n# Basic evaluation\naxolotl evaluate config.yml\n\n# Evaluation with launcher arguments\naxolotl evaluate config.yml --launcher torchrun -- --nproc_per_node=2\n\n\nlm-eval\nRuns LM Evaluation Harness on your model.\n# Basic evaluation\naxolotl lm-eval config.yml\nConfiguration options:\nlm_eval_model: # model to evaluate (local or hf path)\n\n# List of tasks to evaluate\nlm_eval_tasks:\n  - arc_challenge\n  - hellaswag\nlm_eval_batch_size: # Batch size for evaluation\noutput_dir: # Directory to save evaluation results\nSee LM Eval Harness integration docs for full configuration details.\n\n\ndelinearize-llama4\nDelinearizes a Llama 4 linearized model into a regular HuggingFace Llama 4 model. This only works with the non-quantized linearized model.\naxolotl delinearize-llama4 --model path/to/model_dir --output path/to/output_dir\nThis would be necessary to use with other frameworks. If you have an adapter, merge it with the non-quantized linearized model before delinearizing.\n\n\nquantize\nQuantizes a model using the quantization configuration specified in your YAML file.\naxolotl quantize config.yml\nSee Quantization for more details.",
     "crumbs": [
       "Getting Started",
       "Command Line Interface (CLI)"
@@ -3231,6 +3231,17 @@
       "Custom Integrations"
     ]
   },
+  {
+    "objectID": "docs/custom_integrations.html#kernels-integration",
+    "href": "docs/custom_integrations.html#kernels-integration",
+    "title": "Custom Integrations",
+    "section": "Kernels Integration",
+    "text": "Kernels Integration\nMoE (Mixture of Experts) kernels speed up training for MoE layers and reduce VRAM costs. In transformers v5, batched_mm and grouped_mm were integrated as built-in options via the experts_implementation config kwarg:\nclass ExpertsInterface(GeneralInterface):\n    _global_mapping = {\n        \"batched_mm\": batched_mm_experts_forward,\n        \"grouped_mm\": grouped_mm_experts_forward,\n    }\nIn our custom integration, we add support for ScatterMoE, which is even more efficient and faster than grouped_mm.\n\nUsage\nAdd the following to your axolotl YAML config:\nplugins:\n  - axolotl.integrations.kernels.KernelsPlugin\n\nuse_kernels: true\nuse_scattermoe: true\nImportant: Setting experts_implementation is incompatible with use_scattermoe.\n\n\nHow It Works\nThe KernelsPlugin runs before model loading and:\n\nRegisters the ScatterMoE kernel from the axolotl-ai-co/scattermoe Hub repo.\nPatches the model’s SparseMoeBlock forward method with the optimized ScatterMoE implementation.\n\nThis works for any MoE model in transformers that uses a SparseMoeBlock class (Mixtral, Qwen2-MoE, OLMoE, etc.).\n\n\nLimitations\nScatterMoE uses a softmax -&gt; topk routing, so results may be different for some model arch as baseline (GPT-OSS, GLM_MOE_DSA).\n\n\nNote on MegaBlocks\nWe tested MegaBlocks but were unable to ensure numerical accuracy, so we did not integrate it. It was also incompatible with many newer model architectures in transformers.\nPlease see reference here",
+    "crumbs": [
+      "Advanced Features",
+      "Custom Integrations"
+    ]
+  },
   {
     "objectID": "docs/custom_integrations.html#knowledge-distillation-kd",
     "href": "docs/custom_integrations.html#knowledge-distillation-kd",
@@ -3258,7 +3269,7 @@
     "href": "docs/custom_integrations.html#language-model-evaluation-harness-lm-eval",
     "title": "Custom Integrations",
     "section": "Language Model Evaluation Harness (LM Eval)",
-    "text": "Language Model Evaluation Harness (LM Eval)\nRun evaluation on model using the popular lm-evaluation-harness library.\nSee https://github.com/EleutherAI/lm-evaluation-harness\n\nUsage\nplugins:\n  - axolotl.integrations.lm_eval.LMEvalPlugin\n\nlm_eval_tasks:\n  - gsm8k\n  - hellaswag\n  - arc_easy\n\nlm_eval_batch_size: # Batch size for evaluation\noutput_dir: # Directory to save evaluation results\n\n\nCitation\n@misc{eval-harness,\n  author       = {Gao, Leo and Tow, Jonathan and Abbasi, Baber and Biderman, Stella and Black, Sid and DiPofi, Anthony and Foster, Charles and Golding, Laurence and Hsu, Jeffrey and Le Noac'h, Alain and Li, Haonan and McDonell, Kyle and Muennighoff, Niklas and Ociepa, Chris and Phang, Jason and Reynolds, Laria and Schoelkopf, Hailey and Skowron, Aviya and Sutawika, Lintang and Tang, Eric and Thite, Anish and Wang, Ben and Wang, Kevin and Zou, Andy},\n  title        = {A framework for few-shot language model evaluation},\n  month        = 07,\n  year         = 2024,\n  publisher    = {Zenodo},\n  version      = {v0.4.3},\n  doi          = {10.5281/zenodo.12608602},\n  url          = {https://zenodo.org/records/12608602}\n}\nPlease see reference here",
+    "text": "Language Model Evaluation Harness (LM Eval)\nRun evaluation on model using the popular lm-evaluation-harness library.\nSee https://github.com/EleutherAI/lm-evaluation-harness\n\nUsage\nThere are two ways to use the LM Eval integration:\n\n\n1. Post-Training Evaluation\nWhen training with the plugin enabled, evaluation runs automatically after training completes:\nplugins:\n  - axolotl.integrations.lm_eval.LMEvalPlugin\n\nlm_eval_tasks:\n  - gsm8k\n  - hellaswag\n  - arc_easy\n\nlm_eval_batch_size: # Batch size for evaluation\n\noutput_dir:\nRun training as usual:\naxolotl train config.yml\n\n\n2. Standalone CLI Evaluation\nEvaluate any model directly without training:\nlm_eval_model: meta-llama/Llama-2-7b-hf\n\nplugins:\n  - axolotl.integrations.lm_eval.LMEvalPlugin\n\nlm_eval_tasks:\n  - gsm8k\n  - hellaswag\n  - arc_easy\n\nlm_eval_batch_size: 8\noutput_dir: ./outputs\nRun evaluation:\naxolotl lm-eval config.yml\n\n\nModel Selection Priority\nThe model to evaluate is selected in the following priority order:\n\nlm_eval_model - Explicit model path or HuggingFace repo (highest priority)\nhub_model_id - Trained model pushed to HuggingFace Hub\noutput_dir - Local checkpoint directory containing trained model weights\n\n\n\nCitation\n@misc{eval-harness,\n  author       = {Gao, Leo and Tow, Jonathan and Abbasi, Baber and Biderman, Stella and Black, Sid and DiPofi, Anthony and Foster, Charles and Golding, Laurence and Hsu, Jeffrey and Le Noac'h, Alain and Li, Haonan and McDonell, Kyle and Muennighoff, Niklas and Ociepa, Chris and Phang, Jason and Reynolds, Laria and Schoelkopf, Hailey and Skowron, Aviya and Sutawika, Lintang and Tang, Eric and Thite, Anish and Wang, Ben and Wang, Kevin and Zou, Andy},\n  title        = {A framework for few-shot language model evaluation},\n  month        = 07,\n  year         = 2024,\n  publisher    = {Zenodo},\n  version      = {v0.4.3},\n  doi          = {10.5281/zenodo.12608602},\n  url          = {https://zenodo.org/records/12608602}\n}\nPlease see reference here",
     "crumbs": [
       "Advanced Features",
       "Custom Integrations"
diff --git a/sitemap.xml b/sitemap.xml
index 45d3fdb58..036a6eed8 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,946 +2,946 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <url>
     <loc>https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html</loc>
-    <lastmod>2026-02-12T13:58:58.742Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.826Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/mac.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/cli.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/mixed_precision.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/installation.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset_loading.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/sequence_parallelism.html</loc>
-    <lastmod>2026-02-12T13:58:58.716Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.802Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/optimizations.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/gradient_checkpointing.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.798Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/streaming.html</loc>
-    <lastmod>2026-02-12T13:58:58.716Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.802Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/lora_optims.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/amd_hpc.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/debugging.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.798Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/conversation.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/index.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/config-reference.html</loc>
-    <lastmod>2026-02-12T14:02:53.360Z</lastmod>
+    <lastmod>2026-02-19T23:32:01.671Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multimodal.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/ray-integration.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/faq.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.798Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset_preprocessing.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.798Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/fsdp_qlora.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.798Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/quantize.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/reward_modelling.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/plano.html</loc>
-    <lastmod>2026-02-12T14:02:53.929Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.564Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/ministral3/vision.html</loc>
-    <lastmod>2026-02-12T14:02:53.932Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.567Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/ministral3.html</loc>
-    <lastmod>2026-02-12T14:02:53.932Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.567Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/devstral.html</loc>
-    <lastmod>2026-02-12T14:02:53.935Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.570Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/llama-4.html</loc>
-    <lastmod>2026-02-12T14:02:53.936Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.571Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/ministral.html</loc>
-    <lastmod>2026-02-12T14:02:53.934Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.569Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/trinity.html</loc>
-    <lastmod>2026-02-12T14:02:53.931Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.565Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/voxtral.html</loc>
-    <lastmod>2026-02-12T14:02:53.935Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.569Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/magistral.html</loc>
-    <lastmod>2026-02-12T14:02:53.933Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.568Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/LiquidAI.html</loc>
-    <lastmod>2026-02-12T14:02:53.939Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.575Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/arcee.html</loc>
-    <lastmod>2026-02-12T14:02:53.931Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.566Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/internvl3_5.html</loc>
-    <lastmod>2026-02-12T14:02:53.930Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.565Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/kimi-linear.html</loc>
-    <lastmod>2026-02-12T14:02:53.929Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.563Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/magistral/think.html</loc>
-    <lastmod>2026-02-12T14:02:53.933Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.568Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/index.html</loc>
-    <lastmod>2026-02-12T14:02:53.940Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.576Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/qwen3-next.html</loc>
-    <lastmod>2026-02-12T14:02:53.936Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.571Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/gemma3n.html</loc>
-    <lastmod>2026-02-12T14:02:53.937Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.572Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html</loc>
-    <lastmod>2026-02-12T14:02:34.547Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.381Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html</loc>
-    <lastmod>2026-02-12T14:02:35.461Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.314Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html</loc>
-    <lastmod>2026-02-12T14:02:35.983Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.835Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html</loc>
-    <lastmod>2026-02-12T14:02:35.974Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.826Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html</loc>
-    <lastmod>2026-02-12T14:02:34.545Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.379Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html</loc>
-    <lastmod>2026-02-12T14:02:36.112Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.965Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html</loc>
-    <lastmod>2026-02-12T14:02:36.116Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.970Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.preprocess.html</loc>
-    <lastmod>2026-02-12T14:02:34.739Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.575Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.utils.load.html</loc>
-    <lastmod>2026-02-12T14:02:34.794Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.632Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.inference.html</loc>
-    <lastmod>2026-02-12T14:02:34.705Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.541Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.args.html</loc>
-    <lastmod>2026-02-12T14:02:34.649Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.484Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_tokenizers.html</loc>
-    <lastmod>2026-02-12T14:02:34.468Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.300Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.vllm_serve.html</loc>
-    <lastmod>2026-02-12T14:02:34.753Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.590Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html</loc>
-    <lastmod>2026-02-12T14:02:35.473Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.325Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.constants.html</loc>
-    <lastmod>2026-02-12T14:02:34.947Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.788Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html</loc>
-    <lastmod>2026-02-12T14:02:35.111Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.956Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.trl.html</loc>
-    <lastmod>2026-02-12T14:02:34.851Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.690Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html</loc>
-    <lastmod>2026-02-12T14:02:36.019Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.872Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html</loc>
-    <lastmod>2026-02-12T14:02:35.448Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.300Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html</loc>
-    <lastmod>2026-02-12T14:02:35.053Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.897Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html</loc>
-    <lastmod>2026-02-12T14:02:35.124Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.970Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.freeze.html</loc>
-    <lastmod>2026-02-12T14:02:35.552Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.402Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/logging_config.html</loc>
-    <lastmod>2026-02-12T14:02:34.479Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.312Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html</loc>
-    <lastmod>2026-02-12T14:02:35.452Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.304Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.art.html</loc>
-    <lastmod>2026-02-12T14:02:34.653Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.488Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.mamba.html</loc>
-    <lastmod>2026-02-12T14:02:36.048Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.901Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.utils.train.html</loc>
-    <lastmod>2026-02-12T14:02:34.815Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.654Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.base.html</loc>
-    <lastmod>2026-02-12T14:02:35.968Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.820Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html</loc>
-    <lastmod>2026-02-12T14:02:34.866Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.705Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.checks.html</loc>
-    <lastmod>2026-02-12T14:02:34.661Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.496Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/common.architectures.html</loc>
-    <lastmod>2026-02-12T14:02:35.997Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.850Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.bench.html</loc>
-    <lastmod>2026-02-12T14:02:35.542Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.392Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.data.streaming.html</loc>
-    <lastmod>2026-02-12T14:02:35.652Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.501Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.chat_templates.html</loc>
-    <lastmod>2026-02-12T14:02:35.524Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.374Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html</loc>
-    <lastmod>2026-02-12T14:02:35.070Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.914Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.utils.html</loc>
-    <lastmod>2026-02-12T14:02:34.896Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.736Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html</loc>
-    <lastmod>2026-02-12T14:02:35.178Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.025Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.evaluate.html</loc>
-    <lastmod>2026-02-12T14:02:34.625Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.459Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.relora.html</loc>
-    <lastmod>2026-02-12T14:02:35.391Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.242Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html</loc>
-    <lastmod>2026-02-12T14:02:35.177Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.023Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.utils.fetch.html</loc>
-    <lastmod>2026-02-12T14:02:34.787Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.625Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html</loc>
-    <lastmod>2026-02-12T14:02:35.740Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.589Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/common.datasets.html</loc>
-    <lastmod>2026-02-12T14:02:36.018Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.870Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html</loc>
-    <lastmod>2026-02-12T14:02:36.123Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.977Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html</loc>
-    <lastmod>2026-02-12T14:02:35.202Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.050Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.model.html</loc>
-    <lastmod>2026-02-12T14:02:35.709Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.557Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html</loc>
-    <lastmod>2026-02-12T14:02:35.382Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.234Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html</loc>
-    <lastmod>2026-02-12T14:02:35.440Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.293Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.base.html</loc>
-    <lastmod>2026-02-12T14:02:34.833Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.672Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.utils.args.html</loc>
-    <lastmod>2026-02-12T14:02:34.781Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.618Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.trl.html</loc>
-    <lastmod>2026-02-12T14:02:35.755Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.604Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.builders.base.html</loc>
-    <lastmod>2026-02-12T14:02:34.487Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.320Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html</loc>
-    <lastmod>2026-02-12T14:02:35.381Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.232Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html</loc>
-    <lastmod>2026-02-12T14:02:34.765Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.602Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html</loc>
-    <lastmod>2026-02-12T14:02:35.650Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.499Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html</loc>
-    <lastmod>2026-02-12T14:02:35.996Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.848Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.main.html</loc>
-    <lastmod>2026-02-12T14:02:34.605Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.439Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.lora.html</loc>
-    <lastmod>2026-02-12T14:02:35.338Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.189Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html</loc>
-    <lastmod>2026-02-12T14:02:35.538Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.388Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.distributed.html</loc>
-    <lastmod>2026-02-12T14:02:35.634Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.482Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.model.html</loc>
-    <lastmod>2026-02-12T14:02:34.907Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.748Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html</loc>
-    <lastmod>2026-02-12T14:02:35.384Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.236Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/evaluate.html</loc>
-    <lastmod>2026-02-12T14:02:34.392Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.222Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html</loc>
-    <lastmod>2026-02-12T14:02:34.688Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.524Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.trainer.html</loc>
-    <lastmod>2026-02-12T14:02:35.573Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.424Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.quantize.html</loc>
-    <lastmod>2026-02-12T14:02:34.745Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.581Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/common.const.html</loc>
-    <lastmod>2026-02-12T14:02:35.999Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.852Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html</loc>
-    <lastmod>2026-02-12T14:02:35.095Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.941Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html</loc>
-    <lastmod>2026-02-12T14:02:36.054Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.907Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.datasets.chat.html</loc>
-    <lastmod>2026-02-12T14:02:34.555Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.389Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html</loc>
-    <lastmod>2026-02-12T14:02:35.234Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.083Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.mamba.html</loc>
-    <lastmod>2026-02-12T14:02:34.858Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.697Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.builders.causal.html</loc>
-    <lastmod>2026-02-12T14:02:34.493Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.326Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.geglu.html</loc>
-    <lastmod>2026-02-12T14:02:35.351Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.202Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.utils.html</loc>
-    <lastmod>2026-02-12T14:02:35.800Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.648Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/custom_integrations.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/index.html</loc>
-    <lastmod>2026-02-12T13:58:58.737Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.821Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html</loc>
-    <lastmod>2026-02-12T13:58:58.720Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.806Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/FAQS.html</loc>
-    <lastmod>2026-02-12T13:58:58.709Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.795Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/inference.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.800Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html</loc>
-    <lastmod>2026-02-12T14:02:34.564Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.398Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/train.html</loc>
-    <lastmod>2026-02-12T14:02:34.380Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.209Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.training.html</loc>
-    <lastmod>2026-02-12T14:02:35.717Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.566Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.quantize.html</loc>
-    <lastmod>2026-02-12T14:02:35.372Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.223Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.dict.html</loc>
-    <lastmod>2026-02-12T14:02:35.641Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.489Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.quantization.html</loc>
-    <lastmod>2026-02-12T14:02:35.684Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.532Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.batching.html</loc>
-    <lastmod>2026-02-12T14:02:36.044Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.897Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html</loc>
-    <lastmod>2026-02-12T14:02:35.386Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.238Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.config.html</loc>
-    <lastmod>2026-02-12T14:02:34.682Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.518Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html</loc>
-    <lastmod>2026-02-12T14:02:34.954Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.795Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.lora.html</loc>
-    <lastmod>2026-02-12T14:02:35.531Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.381Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html</loc>
-    <lastmod>2026-02-12T14:02:34.894Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.734Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html</loc>
-    <lastmod>2026-02-12T14:02:35.129Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.975Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html</loc>
-    <lastmod>2026-02-12T14:02:35.475Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.327Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html</loc>
-    <lastmod>2026-02-12T14:02:35.164Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.010Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/index.html</loc>
-    <lastmod>2026-02-12T14:02:34.304Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.132Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html</loc>
-    <lastmod>2026-02-12T14:02:34.994Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.836Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.utils.html</loc>
-    <lastmod>2026-02-12T14:02:34.767Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.604Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.merge_lora.html</loc>
-    <lastmod>2026-02-12T14:02:34.715Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.551Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.utils.html</loc>
-    <lastmod>2026-02-12T14:02:35.374Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.225Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.base.html</loc>
-    <lastmod>2026-02-12T14:02:34.996Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.838Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html</loc>
-    <lastmod>2026-02-12T14:02:36.127Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.981Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html</loc>
-    <lastmod>2026-02-12T14:02:35.761Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.610Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html</loc>
-    <lastmod>2026-02-12T14:02:35.037Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.879Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html</loc>
-    <lastmod>2026-02-12T14:02:36.135Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.990Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html</loc>
-    <lastmod>2026-02-12T14:02:36.104Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.957Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html</loc>
-    <lastmod>2026-02-12T14:02:35.230Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.078Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.format.shared.html</loc>
-    <lastmod>2026-02-12T14:02:34.549Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.382Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.collators.core.html</loc>
-    <lastmod>2026-02-12T14:02:36.021Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.873Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html</loc>
-    <lastmod>2026-02-12T14:02:35.991Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.844Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html</loc>
-    <lastmod>2026-02-12T14:02:34.879Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.719Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html</loc>
-    <lastmod>2026-02-12T14:02:35.151Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.997Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html</loc>
-    <lastmod>2026-02-12T14:02:35.439Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.291Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.enums.html</loc>
-    <lastmod>2026-02-12T14:02:35.793Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.642Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.config.html</loc>
-    <lastmod>2026-02-12T14:02:35.700Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.548Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html</loc>
-    <lastmod>2026-02-12T14:02:35.182Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.029Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/convert.html</loc>
-    <lastmod>2026-02-12T14:02:34.416Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.247Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html</loc>
-    <lastmod>2026-02-12T14:02:35.782Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.631Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html</loc>
-    <lastmod>2026-02-12T14:02:35.192Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.039Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html</loc>
-    <lastmod>2026-02-12T14:02:35.180Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.027Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html</loc>
-    <lastmod>2026-02-12T14:02:34.966Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.807Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.chat.messages.html</loc>
-    <lastmod>2026-02-12T14:02:34.543Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.377Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html</loc>
-    <lastmod>2026-02-12T14:02:35.080Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.924Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html</loc>
-    <lastmod>2026-02-12T14:02:35.143Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.988Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html</loc>
-    <lastmod>2026-02-12T14:02:35.427Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.279Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html</loc>
-    <lastmod>2026-02-12T14:02:35.116Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.961Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html</loc>
-    <lastmod>2026-02-12T14:02:35.103Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.948Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html</loc>
-    <lastmod>2026-02-12T14:02:34.958Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.799Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.train.html</loc>
-    <lastmod>2026-02-12T14:02:34.615Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.449Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html</loc>
-    <lastmod>2026-02-12T14:02:34.729Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.565Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.adapter.html</loc>
-    <lastmod>2026-02-12T14:02:34.926Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.767Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.training_args.html</loc>
-    <lastmod>2026-02-12T14:02:34.514Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.347Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html</loc>
-    <lastmod>2026-02-12T14:02:36.118Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.972Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.processor.html</loc>
-    <lastmod>2026-02-12T14:02:34.919Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.760Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/core.builders.rl.html</loc>
-    <lastmod>2026-02-12T14:02:34.499Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.331Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.utils.html</loc>
-    <lastmod>2026-02-12T14:02:35.437Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.289Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html</loc>
-    <lastmod>2026-02-12T14:02:35.204Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.051Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html</loc>
-    <lastmod>2026-02-12T14:02:35.972Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.825Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/kernels.swiglu.html</loc>
-    <lastmod>2026-02-12T14:02:35.363Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.214Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html</loc>
-    <lastmod>2026-02-12T14:02:35.514Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.364Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.cloud.base.html</loc>
-    <lastmod>2026-02-12T14:02:34.757Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.594Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html</loc>
-    <lastmod>2026-02-12T14:02:35.055Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.899Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html</loc>
-    <lastmod>2026-02-12T14:02:35.137Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.983Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schemas.peft.html</loc>
-    <lastmod>2026-02-12T14:02:35.750Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.600Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/integrations.liger.args.html</loc>
-    <lastmod>2026-02-12T14:02:35.987Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.840Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.patch_manager.html</loc>
-    <lastmod>2026-02-12T14:02:34.945Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.786Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html</loc>
-    <lastmod>2026-02-12T14:02:35.459Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.312Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.data.sft.html</loc>
-    <lastmod>2026-02-12T14:02:35.660Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.508Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html</loc>
-    <lastmod>2026-02-12T14:02:35.479Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.331Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html</loc>
-    <lastmod>2026-02-12T14:02:35.393Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.244Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/loaders.tokenizer.html</loc>
-    <lastmod>2026-02-12T14:02:34.917Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.758Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.tokenization.html</loc>
-    <lastmod>2026-02-12T14:02:35.522Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.373Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/datasets.html</loc>
-    <lastmod>2026-02-12T14:02:34.400Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.230Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html</loc>
-    <lastmod>2026-02-12T14:02:34.801Z</lastmod>
+    <lastmod>2026-02-19T23:31:41.639Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/api/utils.schedulers.html</loc>
-    <lastmod>2026-02-12T14:02:35.607Z</lastmod>
+    <lastmod>2026-02-19T23:31:42.457Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/mimo.html</loc>
-    <lastmod>2026-02-12T14:02:53.930Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.564Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/jamba.html</loc>
-    <lastmod>2026-02-12T14:02:53.940Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.575Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/magistral/vision.html</loc>
-    <lastmod>2026-02-12T14:02:53.934Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.568Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/llama-2.html</loc>
-    <lastmod>2026-02-12T14:02:53.936Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.571Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/smolvlm2.html</loc>
-    <lastmod>2026-02-12T14:02:53.939Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.574Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/mistral-small.html</loc>
-    <lastmod>2026-02-12T14:02:53.934Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.569Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/mistral.html</loc>
-    <lastmod>2026-02-12T14:02:53.935Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.570Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/hunyuan.html</loc>
-    <lastmod>2026-02-12T14:02:53.940Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.575Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/orpheus.html</loc>
-    <lastmod>2026-02-12T14:02:53.940Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.576Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/qwen3.html</loc>
-    <lastmod>2026-02-12T14:02:53.937Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.572Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/seed-oss.html</loc>
-    <lastmod>2026-02-12T14:02:53.938Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.573Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/granite4.html</loc>
-    <lastmod>2026-02-12T14:02:53.939Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.574Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/olmo3.html</loc>
-    <lastmod>2026-02-12T14:02:53.930Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.565Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/phi.html</loc>
-    <lastmod>2026-02-12T14:02:53.938Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.574Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/gpt-oss.html</loc>
-    <lastmod>2026-02-12T14:02:53.938Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.573Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/ministral3/think.html</loc>
-    <lastmod>2026-02-12T14:02:53.932Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.567Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/models/apertus.html</loc>
-    <lastmod>2026-02-12T14:02:53.937Z</lastmod>
+    <lastmod>2026-02-19T23:32:02.573Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/optimizers.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/torchao.html</loc>
-    <lastmod>2026-02-12T13:58:58.716Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.802Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/attention.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/nd_parallelism.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/batch_vs_grad.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multi-node.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/rlhf.html</loc>
-    <lastmod>2026-02-12T13:58:58.716Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/pretraining.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/tokenized.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/dataset-formats/template_free.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multi-gpu.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/input_output.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.800Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/docker.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.798Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/checkpoint_saving.html</loc>
-    <lastmod>2026-02-12T13:58:58.711Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.797Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/multipack.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/qat.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/lr_groups.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/getting-started.html</loc>
-    <lastmod>2026-02-12T13:58:58.712Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.798Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/nccl.html</loc>
-    <lastmod>2026-02-12T13:58:58.715Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.801Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/telemetry.html</loc>
-    <lastmod>2026-02-12T13:58:58.716Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.802Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/docs/unsloth.html</loc>
-    <lastmod>2026-02-12T13:58:58.716Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.802Z</lastmod>
   </url>
   <url>
     <loc>https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html</loc>
-    <lastmod>2026-02-12T13:58:58.741Z</lastmod>
+    <lastmod>2026-02-19T23:28:25.825Z</lastmod>
   </url>
 </urlset>