Built site for gh-pages
This commit is contained in:
@@ -510,7 +510,8 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<section id="axolotl.core.datasets.transforms.chat_builder" class="level1">
|
||||
<h1>core.datasets.transforms.chat_builder</h1>
|
||||
<p><code>core.datasets.transforms.chat_builder</code></p>
|
||||
<p>This module contains a function that builds a transform that takes a row from the dataset and converts it to a Chat.</p>
|
||||
<p>This module contains a function that builds a transform that takes a row from the
|
||||
dataset and converts it to a Chat.</p>
|
||||
<section id="functions" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="functions">Functions</h2>
|
||||
<table class="caption-top table">
|
||||
@@ -532,19 +533,19 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>core.datasets.transforms.chat_builder.chat_message_transform_builder(</span>
|
||||
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a> train_on_inputs<span class="op">=</span><span class="va">False</span>,</span>
|
||||
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a> conversations_field<span class="op">=</span><span class="st">'conversations'</span>,</span>
|
||||
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a> message_field_role<span class="op">=</span>[<span class="st">'role'</span>, <span class="st">'from'</span>],</span>
|
||||
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a> message_field_content<span class="op">=</span>[<span class="st">'value'</span>, <span class="st">'text'</span>, <span class="st">'content'</span>],</span>
|
||||
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a> message_field_training<span class="op">=</span>[<span class="st">'train'</span>, <span class="st">'weight'</span>],</span>
|
||||
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a> message_field_role<span class="op">=</span><span class="va">None</span>,</span>
|
||||
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a> message_field_content<span class="op">=</span><span class="va">None</span>,</span>
|
||||
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a> message_field_training<span class="op">=</span><span class="va">None</span>,</span>
|
||||
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>Builds a transform that takes a row from the dataset and converts it to a Chat</p>
|
||||
<section id="parameters" class="level4 doc-section doc-section-parameters">
|
||||
<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters">Parameters</h4>
|
||||
<table class="caption-top table">
|
||||
<colgroup>
|
||||
<col style="width: 12%">
|
||||
<col style="width: 13%">
|
||||
<col style="width: 10%">
|
||||
<col style="width: 65%">
|
||||
<col style="width: 10%">
|
||||
<col style="width: 61%">
|
||||
<col style="width: 16%">
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="header">
|
||||
@@ -571,19 +572,19 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<td>message_field_role</td>
|
||||
<td>str | list[str]</td>
|
||||
<td>The field name of the role. Defaults to “role”.</td>
|
||||
<td><code>['role', 'from']</code></td>
|
||||
<td><code>None</code></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td>message_field_content</td>
|
||||
<td>str | list[str]</td>
|
||||
<td>The field name of the message content. Defaults to “content”.</td>
|
||||
<td><code>['value', 'text', 'content']</code></td>
|
||||
<td><code>None</code></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td>message_field_training</td>
|
||||
<td>str | list[str]</td>
|
||||
<td>The field name of the train/weight. Defaults to “weight”.</td>
|
||||
<td><code>['train', 'weight']</code></td>
|
||||
<td><code>None</code></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
@@ -553,7 +553,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td><a href="../../docs/api/core.datasets.transforms.chat_builder.html#axolotl.core.datasets.transforms.chat_builder">core.datasets.transforms.chat_builder</a></td>
|
||||
<td>This module contains a function that builds a transform that takes a row from the dataset and converts it to a Chat.</td>
|
||||
<td>This module contains a function that builds a transform that takes a row from the</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
@@ -563,21 +563,23 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
</div>
|
||||
<div id="cell-7" class="cell">
|
||||
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
|
||||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Optionally, upload your own JSONL to your Google Drive</span></span>
|
||||
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>GOOGLE_DRIVE_PATH <span class="op">=</span> <span class="st">""</span> <span class="co"># ex: "MyDrive/Colab\ Notebooks/train.jsonl"</span></span>
|
||||
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="co"># "Select All" permissions, or you may get the error:</span></span>
|
||||
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="co"># "MessageError: Error: credential propagation was unsuccessful"</span></span>
|
||||
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> GOOGLE_DRIVE_PATH:</span>
|
||||
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a> <span class="im">from</span> google.colab <span class="im">import</span> drive</span>
|
||||
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> <span class="co"># Mount your Google Drive</span></span>
|
||||
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a> GOOGLE_DRIVE_MNT <span class="op">=</span> <span class="st">"/content/drive/"</span></span>
|
||||
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a> drive.mount(GOOGLE_DRIVE_MNT, force_remount<span class="op">=</span><span class="va">True</span>)</span>
|
||||
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a> tmp_path <span class="op">=</span> os.path.join(GOOGLE_DRIVE_MNT, GOOGLE_DRIVE_PATH.lstrip(<span class="st">"/"</span>))</span>
|
||||
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> <span class="co"># make sure file exists</span></span>
|
||||
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> <span class="kw">not</span> os.path.isfile(tmp_path):</span>
|
||||
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a> <span class="cf">raise</span> <span class="pp">ValueError</span>(<span class="ss">f"File </span><span class="sc">{</span>tmp_path<span class="sc">}</span><span class="ss"> does not exist"</span>)</span>
|
||||
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a> dataset_id <span class="op">=</span> tmp_path</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Optionally, upload your own JSONL to your Google Drive</span></span>
|
||||
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a>GOOGLE_DRIVE_PATH <span class="op">=</span> <span class="st">""</span> <span class="co"># ex: "MyDrive/Colab\ Notebooks/train.jsonl"</span></span>
|
||||
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="co"># "Select All" permissions, or you may get the error:</span></span>
|
||||
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="co"># "MessageError: Error: credential propagation was unsuccessful"</span></span>
|
||||
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span> GOOGLE_DRIVE_PATH:</span>
|
||||
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> <span class="im">from</span> google.colab <span class="im">import</span> drive</span>
|
||||
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a> <span class="co"># Mount your Google Drive</span></span>
|
||||
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a> GOOGLE_DRIVE_MNT <span class="op">=</span> <span class="st">"/content/drive/"</span></span>
|
||||
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> drive.mount(GOOGLE_DRIVE_MNT, force_remount<span class="op">=</span><span class="va">True</span>)</span>
|
||||
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a> tmp_path <span class="op">=</span> os.path.join(GOOGLE_DRIVE_MNT, GOOGLE_DRIVE_PATH.lstrip(<span class="st">"/"</span>))</span>
|
||||
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a> <span class="co"># make sure file exists</span></span>
|
||||
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> <span class="kw">not</span> os.path.isfile(tmp_path):</span>
|
||||
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a> <span class="cf">raise</span> <span class="pp">ValueError</span>(<span class="ss">f"File </span><span class="sc">{</span>tmp_path<span class="sc">}</span><span class="ss"> does not exist"</span>)</span>
|
||||
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a> dataset_id <span class="op">=</span> tmp_path</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
@@ -590,61 +592,66 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Axolotl provides full control and transparency over model and training configuration</span></span>
|
||||
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>config <span class="op">=</span> DictDefault(</span>
|
||||
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> base_model <span class="op">=</span> <span class="st">"Qwen/Qwen3-14B"</span>, <span class="co"># Use the instruct tuned model, but we're aligning it to be a pirate</span></span>
|
||||
<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a> load_in_4bit <span class="op">=</span> <span class="va">True</span>, <span class="co"># set to True for qLoRA</span></span>
|
||||
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a> adapter <span class="op">=</span> <span class="st">"qlora"</span>,</span>
|
||||
<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a> lora_r <span class="op">=</span> <span class="dv">32</span>,</span>
|
||||
<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a> lora_alpha <span class="op">=</span> <span class="dv">64</span>,</span>
|
||||
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a> lora_target_modules <span class="op">=</span> [</span>
|
||||
<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"q_proj"</span>, <span class="st">"k_proj"</span>, <span class="st">"v_proj"</span>, <span class="st">"o_proj"</span>, <span class="co"># train self_attn linear modules</span></span>
|
||||
<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a> <span class="st">"gate_proj"</span>, <span class="st">"down_proj"</span>, <span class="st">"up_proj"</span>, <span class="co"># train MLP linear modules</span></span>
|
||||
<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a> ],</span>
|
||||
<span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a> lora_qkv_kernel <span class="op">=</span> <span class="va">True</span>, <span class="co"># optimized triton kernels for LoRA</span></span>
|
||||
<span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a> lora_o_kernel <span class="op">=</span> <span class="va">True</span>,</span>
|
||||
<span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a> lora_mlp_kernel <span class="op">=</span> <span class="va">True</span>,</span>
|
||||
<span id="cb4-18"><a href="#cb4-18" aria-hidden="true" tabindex="-1"></a> embeddings_skip_upcast <span class="op">=</span> <span class="va">True</span>, <span class="co"># keep embeddings in fp16 so the model fits in 15GB VRAM</span></span>
|
||||
<span id="cb4-19"><a href="#cb4-19" aria-hidden="true" tabindex="-1"></a> xformers_attention <span class="op">=</span> <span class="va">True</span>, <span class="co"># use xformers on Colab w/ T4 for memory efficient attention, flash_attention only on Ampere or above</span></span>
|
||||
<span id="cb4-20"><a href="#cb4-20" aria-hidden="true" tabindex="-1"></a> plugins <span class="op">=</span> [</span>
|
||||
<span id="cb4-21"><a href="#cb4-21" aria-hidden="true" tabindex="-1"></a> <span class="co"># more efficient training using Apple's Cut Cross Entropy; https://github.com/apple/ml-cross-entropy</span></span>
|
||||
<span id="cb4-22"><a href="#cb4-22" aria-hidden="true" tabindex="-1"></a> <span class="st">"axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin"</span>,</span>
|
||||
<span id="cb4-23"><a href="#cb4-23" aria-hidden="true" tabindex="-1"></a> ],</span>
|
||||
<span id="cb4-24"><a href="#cb4-24" aria-hidden="true" tabindex="-1"></a> sample_packing <span class="op">=</span> <span class="va">True</span>, <span class="co"># 2-6x increase in tokens per micro-batch</span></span>
|
||||
<span id="cb4-25"><a href="#cb4-25" aria-hidden="true" tabindex="-1"></a> <span class="co"># when using packing, use a slightly higher learning rate to account for fewer steps</span></span>
|
||||
<span id="cb4-26"><a href="#cb4-26" aria-hidden="true" tabindex="-1"></a> <span class="co"># alternatively, reduce the micro_batch_size + gradient_accumulation_steps to achieve closer to the same number of steps/epoch</span></span>
|
||||
<span id="cb4-27"><a href="#cb4-27" aria-hidden="true" tabindex="-1"></a> learning_rate <span class="op">=</span> <span class="fl">0.00019</span>,</span>
|
||||
<span id="cb4-28"><a href="#cb4-28" aria-hidden="true" tabindex="-1"></a> sequence_len <span class="op">=</span> <span class="dv">4096</span>, <span class="co"># larger sequence length improves packing efficiency for more tokens/sec</span></span>
|
||||
<span id="cb4-29"><a href="#cb4-29" aria-hidden="true" tabindex="-1"></a> micro_batch_size <span class="op">=</span> <span class="dv">1</span>,</span>
|
||||
<span id="cb4-30"><a href="#cb4-30" aria-hidden="true" tabindex="-1"></a> gradient_accumulation_steps <span class="op">=</span> <span class="dv">1</span>,</span>
|
||||
<span id="cb4-31"><a href="#cb4-31" aria-hidden="true" tabindex="-1"></a> gradient_checkpointing <span class="op">=</span> <span class="va">True</span>, <span class="co"># tradeoff reduced VRAM for increased time</span></span>
|
||||
<span id="cb4-32"><a href="#cb4-32" aria-hidden="true" tabindex="-1"></a> gradient_checkpointing_kwargs <span class="op">=</span> {</span>
|
||||
<span id="cb4-33"><a href="#cb4-33" aria-hidden="true" tabindex="-1"></a> <span class="st">"use_reentrant"</span>: <span class="va">False</span>,</span>
|
||||
<span id="cb4-34"><a href="#cb4-34" aria-hidden="true" tabindex="-1"></a> },</span>
|
||||
<span id="cb4-35"><a href="#cb4-35" aria-hidden="true" tabindex="-1"></a> optimizer <span class="op">=</span> <span class="st">"paged_adamw_8bit"</span>,</span>
|
||||
<span id="cb4-36"><a href="#cb4-36" aria-hidden="true" tabindex="-1"></a> lr_scheduler <span class="op">=</span> <span class="st">"cosine"</span>,</span>
|
||||
<span id="cb4-37"><a href="#cb4-37" aria-hidden="true" tabindex="-1"></a> warmup_steps <span class="op">=</span> <span class="dv">5</span>,</span>
|
||||
<span id="cb4-38"><a href="#cb4-38" aria-hidden="true" tabindex="-1"></a> fp16 <span class="op">=</span> <span class="va">True</span>, <span class="co"># use float16 + automatic mixed precision, bfloat16 not supported on Colab w/ T4</span></span>
|
||||
<span id="cb4-39"><a href="#cb4-39" aria-hidden="true" tabindex="-1"></a> bf16 <span class="op">=</span> <span class="va">False</span>,</span>
|
||||
<span id="cb4-40"><a href="#cb4-40" aria-hidden="true" tabindex="-1"></a> max_grad_norm <span class="op">=</span> <span class="fl">0.1</span>, <span class="co"># gradient clipping</span></span>
|
||||
<span id="cb4-41"><a href="#cb4-41" aria-hidden="true" tabindex="-1"></a> num_epochs <span class="op">=</span> <span class="dv">1</span>,</span>
|
||||
<span id="cb4-42"><a href="#cb4-42" aria-hidden="true" tabindex="-1"></a> saves_per_epoch <span class="op">=</span> <span class="dv">2</span>, <span class="co"># how many checkpoints to save over one epoch</span></span>
|
||||
<span id="cb4-43"><a href="#cb4-43" aria-hidden="true" tabindex="-1"></a> logging_steps <span class="op">=</span> <span class="dv">1</span>,</span>
|
||||
<span id="cb4-44"><a href="#cb4-44" aria-hidden="true" tabindex="-1"></a> output_dir <span class="op">=</span> <span class="st">"./outputs/qwen-sft-pirate-rrr"</span>,</span>
|
||||
<span id="cb4-45"><a href="#cb4-45" aria-hidden="true" tabindex="-1"></a> chat_template <span class="op">=</span> <span class="st">"qwen3"</span>,</span>
|
||||
<span id="cb4-46"><a href="#cb4-46" aria-hidden="true" tabindex="-1"></a> datasets <span class="op">=</span> [</span>
|
||||
<span id="cb4-47"><a href="#cb4-47" aria-hidden="true" tabindex="-1"></a> {</span>
|
||||
<span id="cb4-48"><a href="#cb4-48" aria-hidden="true" tabindex="-1"></a> <span class="st">"path"</span>: dataset_id, <span class="co"># Huggingface Dataset id or path to train.jsonl</span></span>
|
||||
<span id="cb4-49"><a href="#cb4-49" aria-hidden="true" tabindex="-1"></a> <span class="st">"type"</span>: <span class="st">"chat_template"</span>,</span>
|
||||
<span id="cb4-50"><a href="#cb4-50" aria-hidden="true" tabindex="-1"></a> <span class="st">"split"</span>: <span class="st">"train"</span>,</span>
|
||||
<span id="cb4-51"><a href="#cb4-51" aria-hidden="true" tabindex="-1"></a> <span class="st">"eot_tokens"</span>: [<span class="st">"<|im_end|>"</span>],</span>
|
||||
<span id="cb4-52"><a href="#cb4-52" aria-hidden="true" tabindex="-1"></a> }</span>
|
||||
<span id="cb4-53"><a href="#cb4-53" aria-hidden="true" tabindex="-1"></a> ],</span>
|
||||
<span id="cb4-54"><a href="#cb4-54" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor <span class="op">=</span> <span class="dv">8</span>, <span class="co"># dataloader optimizations</span></span>
|
||||
<span id="cb4-55"><a href="#cb4-55" aria-hidden="true" tabindex="-1"></a> dataloader_num_workers <span class="op">=</span> <span class="dv">2</span>,</span>
|
||||
<span id="cb4-56"><a href="#cb4-56" aria-hidden="true" tabindex="-1"></a> dataloader_pin_memory <span class="op">=</span> <span class="va">True</span>,</span>
|
||||
<span id="cb4-57"><a href="#cb4-57" aria-hidden="true" tabindex="-1"></a> )</span>
|
||||
<span id="cb4-58"><a href="#cb4-58" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb4-59"><a href="#cb4-59" aria-hidden="true" tabindex="-1"></a><span class="co"># validates the configuration</span></span>
|
||||
<span id="cb4-60"><a href="#cb4-60" aria-hidden="true" tabindex="-1"></a>cfg <span class="op">=</span> load_cfg(config)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> base_model<span class="op">=</span><span class="st">"Qwen/Qwen3-14B"</span>, <span class="co"># Use the instruct tuned model, but we're aligning it to be a pirate</span></span>
|
||||
<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a> load_in_4bit<span class="op">=</span><span class="va">True</span>, <span class="co"># set to True for qLoRA</span></span>
|
||||
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a> adapter<span class="op">=</span><span class="st">"qlora"</span>,</span>
|
||||
<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a> lora_r<span class="op">=</span><span class="dv">32</span>,</span>
|
||||
<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a> lora_alpha<span class="op">=</span><span class="dv">64</span>,</span>
|
||||
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a> lora_target_modules<span class="op">=</span>[</span>
|
||||
<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"q_proj"</span>,</span>
|
||||
<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a> <span class="st">"k_proj"</span>,</span>
|
||||
<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a> <span class="st">"v_proj"</span>,</span>
|
||||
<span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a> <span class="st">"o_proj"</span>, <span class="co"># train self_attn linear modules</span></span>
|
||||
<span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a> <span class="st">"gate_proj"</span>,</span>
|
||||
<span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a> <span class="st">"down_proj"</span>,</span>
|
||||
<span id="cb4-18"><a href="#cb4-18" aria-hidden="true" tabindex="-1"></a> <span class="st">"up_proj"</span>, <span class="co"># train MLP linear modules</span></span>
|
||||
<span id="cb4-19"><a href="#cb4-19" aria-hidden="true" tabindex="-1"></a> ],</span>
|
||||
<span id="cb4-20"><a href="#cb4-20" aria-hidden="true" tabindex="-1"></a> lora_qkv_kernel<span class="op">=</span><span class="va">True</span>, <span class="co"># optimized triton kernels for LoRA</span></span>
|
||||
<span id="cb4-21"><a href="#cb4-21" aria-hidden="true" tabindex="-1"></a> lora_o_kernel<span class="op">=</span><span class="va">True</span>,</span>
|
||||
<span id="cb4-22"><a href="#cb4-22" aria-hidden="true" tabindex="-1"></a> lora_mlp_kernel<span class="op">=</span><span class="va">True</span>,</span>
|
||||
<span id="cb4-23"><a href="#cb4-23" aria-hidden="true" tabindex="-1"></a> embeddings_skip_upcast<span class="op">=</span><span class="va">True</span>, <span class="co"># keep embeddings in fp16 so the model fits in 15GB VRAM</span></span>
|
||||
<span id="cb4-24"><a href="#cb4-24" aria-hidden="true" tabindex="-1"></a> xformers_attention<span class="op">=</span><span class="va">True</span>, <span class="co"># use xformers on Colab w/ T4 for memory efficient attention, flash_attention only on Ampere or above</span></span>
|
||||
<span id="cb4-25"><a href="#cb4-25" aria-hidden="true" tabindex="-1"></a> plugins<span class="op">=</span>[</span>
|
||||
<span id="cb4-26"><a href="#cb4-26" aria-hidden="true" tabindex="-1"></a> <span class="co"># more efficient training using Apple's Cut Cross Entropy; https://github.com/apple/ml-cross-entropy</span></span>
|
||||
<span id="cb4-27"><a href="#cb4-27" aria-hidden="true" tabindex="-1"></a> <span class="st">"axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin"</span>,</span>
|
||||
<span id="cb4-28"><a href="#cb4-28" aria-hidden="true" tabindex="-1"></a> ],</span>
|
||||
<span id="cb4-29"><a href="#cb4-29" aria-hidden="true" tabindex="-1"></a> sample_packing<span class="op">=</span><span class="va">True</span>, <span class="co"># 2-6x increase in tokens per micro-batch</span></span>
|
||||
<span id="cb4-30"><a href="#cb4-30" aria-hidden="true" tabindex="-1"></a> <span class="co"># when using packing, use a slightly higher learning rate to account for fewer steps</span></span>
|
||||
<span id="cb4-31"><a href="#cb4-31" aria-hidden="true" tabindex="-1"></a> <span class="co"># alternatively, reduce the micro_batch_size + gradient_accumulation_steps to achieve closer to the same number of steps/epoch</span></span>
|
||||
<span id="cb4-32"><a href="#cb4-32" aria-hidden="true" tabindex="-1"></a> learning_rate<span class="op">=</span><span class="fl">0.00019</span>,</span>
|
||||
<span id="cb4-33"><a href="#cb4-33" aria-hidden="true" tabindex="-1"></a> sequence_len<span class="op">=</span><span class="dv">4096</span>, <span class="co"># larger sequence length improves packing efficiency for more tokens/sec</span></span>
|
||||
<span id="cb4-34"><a href="#cb4-34" aria-hidden="true" tabindex="-1"></a> micro_batch_size<span class="op">=</span><span class="dv">1</span>,</span>
|
||||
<span id="cb4-35"><a href="#cb4-35" aria-hidden="true" tabindex="-1"></a> gradient_accumulation_steps<span class="op">=</span><span class="dv">1</span>,</span>
|
||||
<span id="cb4-36"><a href="#cb4-36" aria-hidden="true" tabindex="-1"></a> gradient_checkpointing<span class="op">=</span><span class="va">True</span>, <span class="co"># tradeoff reduced VRAM for increased time</span></span>
|
||||
<span id="cb4-37"><a href="#cb4-37" aria-hidden="true" tabindex="-1"></a> gradient_checkpointing_kwargs<span class="op">=</span>{</span>
|
||||
<span id="cb4-38"><a href="#cb4-38" aria-hidden="true" tabindex="-1"></a> <span class="st">"use_reentrant"</span>: <span class="va">False</span>,</span>
|
||||
<span id="cb4-39"><a href="#cb4-39" aria-hidden="true" tabindex="-1"></a> },</span>
|
||||
<span id="cb4-40"><a href="#cb4-40" aria-hidden="true" tabindex="-1"></a> optimizer<span class="op">=</span><span class="st">"paged_adamw_8bit"</span>,</span>
|
||||
<span id="cb4-41"><a href="#cb4-41" aria-hidden="true" tabindex="-1"></a> lr_scheduler<span class="op">=</span><span class="st">"cosine"</span>,</span>
|
||||
<span id="cb4-42"><a href="#cb4-42" aria-hidden="true" tabindex="-1"></a> warmup_steps<span class="op">=</span><span class="dv">5</span>,</span>
|
||||
<span id="cb4-43"><a href="#cb4-43" aria-hidden="true" tabindex="-1"></a> fp16<span class="op">=</span><span class="va">True</span>, <span class="co"># use float16 + automatic mixed precision, bfloat16 not supported on Colab w/ T4</span></span>
|
||||
<span id="cb4-44"><a href="#cb4-44" aria-hidden="true" tabindex="-1"></a> bf16<span class="op">=</span><span class="va">False</span>,</span>
|
||||
<span id="cb4-45"><a href="#cb4-45" aria-hidden="true" tabindex="-1"></a> max_grad_norm<span class="op">=</span><span class="fl">0.1</span>, <span class="co"># gradient clipping</span></span>
|
||||
<span id="cb4-46"><a href="#cb4-46" aria-hidden="true" tabindex="-1"></a> num_epochs<span class="op">=</span><span class="dv">1</span>,</span>
|
||||
<span id="cb4-47"><a href="#cb4-47" aria-hidden="true" tabindex="-1"></a> saves_per_epoch<span class="op">=</span><span class="dv">2</span>, <span class="co"># how many checkpoints to save over one epoch</span></span>
|
||||
<span id="cb4-48"><a href="#cb4-48" aria-hidden="true" tabindex="-1"></a> logging_steps<span class="op">=</span><span class="dv">1</span>,</span>
|
||||
<span id="cb4-49"><a href="#cb4-49" aria-hidden="true" tabindex="-1"></a> output_dir<span class="op">=</span><span class="st">"./outputs/qwen-sft-pirate-rrr"</span>,</span>
|
||||
<span id="cb4-50"><a href="#cb4-50" aria-hidden="true" tabindex="-1"></a> chat_template<span class="op">=</span><span class="st">"qwen3"</span>,</span>
|
||||
<span id="cb4-51"><a href="#cb4-51" aria-hidden="true" tabindex="-1"></a> datasets<span class="op">=</span>[</span>
|
||||
<span id="cb4-52"><a href="#cb4-52" aria-hidden="true" tabindex="-1"></a> {</span>
|
||||
<span id="cb4-53"><a href="#cb4-53" aria-hidden="true" tabindex="-1"></a> <span class="st">"path"</span>: dataset_id, <span class="co"># Huggingface Dataset id or path to train.jsonl</span></span>
|
||||
<span id="cb4-54"><a href="#cb4-54" aria-hidden="true" tabindex="-1"></a> <span class="st">"type"</span>: <span class="st">"chat_template"</span>,</span>
|
||||
<span id="cb4-55"><a href="#cb4-55" aria-hidden="true" tabindex="-1"></a> <span class="st">"split"</span>: <span class="st">"train"</span>,</span>
|
||||
<span id="cb4-56"><a href="#cb4-56" aria-hidden="true" tabindex="-1"></a> <span class="st">"eot_tokens"</span>: [<span class="st">"<|im_end|>"</span>],</span>
|
||||
<span id="cb4-57"><a href="#cb4-57" aria-hidden="true" tabindex="-1"></a> }</span>
|
||||
<span id="cb4-58"><a href="#cb4-58" aria-hidden="true" tabindex="-1"></a> ],</span>
|
||||
<span id="cb4-59"><a href="#cb4-59" aria-hidden="true" tabindex="-1"></a> dataloader_prefetch_factor<span class="op">=</span><span class="dv">8</span>, <span class="co"># dataloader optimizations</span></span>
|
||||
<span id="cb4-60"><a href="#cb4-60" aria-hidden="true" tabindex="-1"></a> dataloader_num_workers<span class="op">=</span><span class="dv">2</span>,</span>
|
||||
<span id="cb4-61"><a href="#cb4-61" aria-hidden="true" tabindex="-1"></a> dataloader_pin_memory<span class="op">=</span><span class="va">True</span>,</span>
|
||||
<span id="cb4-62"><a href="#cb4-62" aria-hidden="true" tabindex="-1"></a>)</span>
|
||||
<span id="cb4-63"><a href="#cb4-63" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb4-64"><a href="#cb4-64" aria-hidden="true" tabindex="-1"></a><span class="co"># validates the configuration</span></span>
|
||||
<span id="cb4-65"><a href="#cb4-65" aria-hidden="true" tabindex="-1"></a>cfg <span class="op">=</span> load_cfg(config)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="cell-output cell-output-stdout">
|
||||
<div class="ansi-escaped-output">
|
||||
<pre>[2025-05-08 13:40:27,488] [INFO] [root.register:348] [PID:174] Attempting to load plugin: axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
|
||||
@@ -673,8 +680,9 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
</div>
|
||||
<div id="cell-10" class="cell">
|
||||
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> axolotl.utils <span class="im">import</span> patch_optimized_env</span>
|
||||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="co"># speedup downloads from HF 🤗 and set "PYTORCH_CUDA_ALLOC_CONF" env to save memory</span></span>
|
||||
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>patch_optimized_env()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="co"># speedup downloads from HF 🤗 and set "PYTORCH_CUDA_ALLOC_CONF" env to save memory</span></span>
|
||||
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>patch_optimized_env()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="datasets" class="level1">
|
||||
@@ -1235,29 +1243,30 @@ You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokeni
|
||||
<section id="inferencing-the-trained-model" class="level1">
|
||||
<h1>Inferencing the trained model</h1>
|
||||
<div id="cell-16" class="cell" data-quarto-private-1="{"key":"colab","value":{"base_uri":"https://localhost:8080/"}}" data-outputid="e5050605-f6c9-421c-98f9-bde56a281eae">
|
||||
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> torch</span>
|
||||
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> transformers <span class="im">import</span> TextStreamer</span>
|
||||
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>messages <span class="op">=</span> [</span>
|
||||
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a> {</span>
|
||||
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a> <span class="st">"role"</span>: <span class="st">"user"</span>,</span>
|
||||
<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a> <span class="st">"content"</span>: <span class="st">"Explain the Pythagorean theorem to me."</span>,</span>
|
||||
<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a> },</span>
|
||||
<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a>]</span>
|
||||
<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a>prompt <span class="op">=</span> tokenizer.apply_chat_template(</span>
|
||||
<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a> messages,</span>
|
||||
<span id="cb11-13"><a href="#cb11-13" aria-hidden="true" tabindex="-1"></a> add_generation_prompt<span class="op">=</span><span class="va">True</span>,</span>
|
||||
<span id="cb11-14"><a href="#cb11-14" aria-hidden="true" tabindex="-1"></a> tokenize<span class="op">=</span><span class="va">False</span>,</span>
|
||||
<span id="cb11-15"><a href="#cb11-15" aria-hidden="true" tabindex="-1"></a> enable_thinking <span class="op">=</span> <span class="va">False</span>,</span>
|
||||
<span id="cb11-16"><a href="#cb11-16" aria-hidden="true" tabindex="-1"></a>)</span>
|
||||
<span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb11-18"><a href="#cb11-18" aria-hidden="true" tabindex="-1"></a>outputs <span class="op">=</span> model.generate(</span>
|
||||
<span id="cb11-19"><a href="#cb11-19" aria-hidden="true" tabindex="-1"></a> <span class="op">**</span>tokenizer(prompt, return_tensors <span class="op">=</span> <span class="st">"pt"</span>).to(<span class="st">"cuda"</span>),</span>
|
||||
<span id="cb11-20"><a href="#cb11-20" aria-hidden="true" tabindex="-1"></a> max_new_tokens <span class="op">=</span> <span class="dv">192</span>,</span>
|
||||
<span id="cb11-21"><a href="#cb11-21" aria-hidden="true" tabindex="-1"></a> temperature <span class="op">=</span> <span class="fl">1.0</span>, top_p <span class="op">=</span> <span class="fl">0.8</span>, top_k <span class="op">=</span> <span class="dv">32</span>,</span>
|
||||
<span id="cb11-22"><a href="#cb11-22" aria-hidden="true" tabindex="-1"></a> streamer <span class="op">=</span> TextStreamer(tokenizer, skip_prompt <span class="op">=</span> <span class="va">True</span>),</span>
|
||||
<span id="cb11-23"><a href="#cb11-23" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> transformers <span class="im">import</span> TextStreamer</span>
|
||||
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>messages <span class="op">=</span> [</span>
|
||||
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a> {</span>
|
||||
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a> <span class="st">"role"</span>: <span class="st">"user"</span>,</span>
|
||||
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a> <span class="st">"content"</span>: <span class="st">"Explain the Pythagorean theorem to me."</span>,</span>
|
||||
<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a> },</span>
|
||||
<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a>]</span>
|
||||
<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb11-10"><a href="#cb11-10" aria-hidden="true" tabindex="-1"></a>prompt <span class="op">=</span> tokenizer.apply_chat_template(</span>
|
||||
<span id="cb11-11"><a href="#cb11-11" aria-hidden="true" tabindex="-1"></a> messages,</span>
|
||||
<span id="cb11-12"><a href="#cb11-12" aria-hidden="true" tabindex="-1"></a> add_generation_prompt<span class="op">=</span><span class="va">True</span>,</span>
|
||||
<span id="cb11-13"><a href="#cb11-13" aria-hidden="true" tabindex="-1"></a> tokenize<span class="op">=</span><span class="va">False</span>,</span>
|
||||
<span id="cb11-14"><a href="#cb11-14" aria-hidden="true" tabindex="-1"></a> enable_thinking<span class="op">=</span><span class="va">False</span>,</span>
|
||||
<span id="cb11-15"><a href="#cb11-15" aria-hidden="true" tabindex="-1"></a>)</span>
|
||||
<span id="cb11-16"><a href="#cb11-16" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb11-17"><a href="#cb11-17" aria-hidden="true" tabindex="-1"></a>outputs <span class="op">=</span> model.generate(</span>
|
||||
<span id="cb11-18"><a href="#cb11-18" aria-hidden="true" tabindex="-1"></a> <span class="op">**</span>tokenizer(prompt, return_tensors<span class="op">=</span><span class="st">"pt"</span>).to(<span class="st">"cuda"</span>),</span>
|
||||
<span id="cb11-19"><a href="#cb11-19" aria-hidden="true" tabindex="-1"></a> max_new_tokens<span class="op">=</span><span class="dv">192</span>,</span>
|
||||
<span id="cb11-20"><a href="#cb11-20" aria-hidden="true" tabindex="-1"></a> temperature<span class="op">=</span><span class="fl">1.0</span>,</span>
|
||||
<span id="cb11-21"><a href="#cb11-21" aria-hidden="true" tabindex="-1"></a> top_p<span class="op">=</span><span class="fl">0.8</span>,</span>
|
||||
<span id="cb11-22"><a href="#cb11-22" aria-hidden="true" tabindex="-1"></a> top_k<span class="op">=</span><span class="dv">32</span>,</span>
|
||||
<span id="cb11-23"><a href="#cb11-23" aria-hidden="true" tabindex="-1"></a> streamer<span class="op">=</span>TextStreamer(tokenizer, skip_prompt<span class="op">=</span><span class="va">True</span>),</span>
|
||||
<span id="cb11-24"><a href="#cb11-24" aria-hidden="true" tabindex="-1"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="cell-output cell-output-stdout">
|
||||
<pre><code>Ahoy there, matey! Shiver me timbers, ye be lookin' for the Pythagorean theorem, eh? Well, hold yer horses and listen up, for I'll be tellin' ye all about it in me own special way.
|
||||
|
||||
@@ -1295,14 +1304,15 @@ drwxr-xr-x 2 root root 4.0K May 7 22:21 checkpoint-25
|
||||
<p>If you prefer to manually upload the training artifacts, we can still upload the entire final checkpoint to HuggingFace from the CLI.</p>
|
||||
<div id="cell-20" class="cell" data-quarto-private-1="{"key":"colab","value":{"base_uri":"https://localhost:8080/","height":955,"referenced_widgets":["c12ea43372ac4d57bb9605f1a429b397","86816687746246b4a6105e8010384e25","6f05e9bebf7b40c9835808e77de6c236","c7433acd3c4841e6958ae8f7e87b1808","19c1e38389fa46c7b7e2152a56e1df34","0e067d8db8ed48308a718d5f57683fd1","131065f118274a1586ac38e39ed84ef0","8640ac440fbc4644b9a3af7ba3ae7183","5cea7996f02040b187ece0bb2d6a8d1f","2e257c8be2da40b4bb67a9e4ab6811f3","56e3768bef5a4b9db4168c5c17f509c2","62c028fdef904dedb9cdeca2b3bda725","a7cf477e80fc43e0ad82c7997b076dce","835bcc28a5564fb9b3d651bc8e32dc46","9f1c9a0695384bdaa6f8b847ef89bee8","b1bea589efa14258a9982071b87938bf","590eef89881545aa8bbef9a8bbe7fb00","4b1f04ff63d14a118fdd15814dff50e4","39789237703c4a418134243055c9cbf5","a3a945817f684328b34651fe052393ec"]}}" data-outputid="6e489ab2-4abe-4e28-84ca-959f912433a4">
|
||||
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> huggingface_hub <span class="im">import</span> notebook_login</span>
|
||||
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="co"># remove the partial epoch checkpoints</span></span>
|
||||
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>rm <span class="op">-</span>rf <span class="st">"./outputs/qwen-sft-pirate-rrr/checkpoint-*"</span></span>
|
||||
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="co"># HF Notebook login widget</span></span>
|
||||
<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a>notebook_login()</span>
|
||||
<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="co"># upload the LoRA adapter for your model to HF, remember to update the username/model-name below</span></span>
|
||||
<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>huggingface<span class="op">-</span>cli upload <span class="op">--</span>repo<span class="op">-</span><span class="bu">type</span><span class="op">=</span>model winglian<span class="op">/</span>pirate<span class="op">-</span>qwen<span class="op">-</span><span class="dv">14</span><span class="er">B</span> <span class="st">"./outputs/qwen-sft-pirate-rrr"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="co"># remove the partial epoch checkpoints</span></span>
|
||||
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>rm <span class="op">-</span>rf <span class="st">"./outputs/qwen-sft-pirate-rrr/checkpoint-*"</span></span>
|
||||
<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="co"># HF Notebook login widget</span></span>
|
||||
<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a>notebook_login()</span>
|
||||
<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="co"># upload the LoRA adapter for your model to HF, remember to update the username/model-name below</span></span>
|
||||
<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>huggingface<span class="op">-</span>cli upload <span class="op">--</span>repo<span class="op">-</span><span class="bu">type</span><span class="op">=</span>model winglian<span class="op">/</span>pirate<span class="op">-</span>qwen<span class="op">-</span><span class="dv">14</span><span class="er">B</span> <span class="st">"./outputs/qwen-sft-pirate-rrr"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="cell-output cell-output-display">
|
||||
<script type="application/vnd.jupyter.widget-view+json">
|
||||
{"model_id":"c12ea43372ac4d57bb9605f1a429b397","version_major":2,"version_minor":0,"quarto_mimetype":"application/vnd.jupyter.widget-view+json"}
|
||||
|
||||
10
search.json
10
search.json
File diff suppressed because one or more lines are too long
394
sitemap.xml
394
sitemap.xml
@@ -2,790 +2,790 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/index.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.174Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.321Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.178Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.326Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/gradient_checkpointing.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.303Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/mixed_precision.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.305Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/sequence_parallelism.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.158Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/docker.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/torchao.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.158Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/multi-gpu.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.305Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset_preprocessing.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/debugging.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/rlhf.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.158Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/lr_groups.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.305Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/multimodal.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/ray-integration.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/input_output.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.305Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/inference.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.305Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/fsdp_qlora.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/multipack.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.192Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.421Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.459Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.688Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.art.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.820Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.053Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.quantize.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.900Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.130Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.410Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.639Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html</loc>
|
||||
<lastmod>2025-08-22T18:32:34.002Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.236Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.196Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.426Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.885Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.119Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.data.sft.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.632Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.863Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.477Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.705Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.geglu.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.385Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.614Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.283Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.512Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.946Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.176Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.851Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.082Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.214Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.443Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/evaluate.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.602Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.836Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.data.pretraining.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.625Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.856Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/index.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.534Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.766Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.465Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.694Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.utils.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.456Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.685Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.checks.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.827Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.060Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.chat_templates.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.524Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.754Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.builders.rl.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.692Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.926Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.218Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.447Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.062Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.293Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.207Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.436Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.073Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.303Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.fetch.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.935Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.165Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.699Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.930Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.cloud.base.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.910Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.140Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.args.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.929Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.160Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html</loc>
|
||||
<lastmod>2025-08-22T18:32:34.011Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.245Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html</loc>
|
||||
<lastmod>2025-08-22T18:32:34.007Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.241Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.builders.causal.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.688Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.922Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.train.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.788Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.022Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.728Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.961Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.901Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.134Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.evaluate.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.797Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.030Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.trainer.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.563Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.793Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.252Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.482Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/convert.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.626Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.860Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.716Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.948Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.patch_manager.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.055Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.286Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.training.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.681Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.912Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.config.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.667Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.898Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.262Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.491Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.287Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.516Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.vllm_serve.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.907Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.137Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.159Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.388Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.merge_lora.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.874Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.105Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.096Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.327Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.147Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.376Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.bench.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.538Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.768Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/common.datasets.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.922Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.156Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.train.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.958Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.188Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.411Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.641Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.chat.messages.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.728Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.962Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.219Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.449Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.trl.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.984Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.214Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.preprocess.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.895Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.125Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.swiglu.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.395Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.625Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.quantize.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.403Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.632Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.132Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.361Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.261Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.490Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.007Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.237Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.413Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.642Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.datasets.chat.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.737Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.972Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.args.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.817Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.050Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.main.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.780Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.014Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.996Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.226Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.trl.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.711Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.943Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.244Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.474Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_tokenizers.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.668Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.901Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.146Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.375Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/logging_config.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.677Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.911Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/tokenized.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/index.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.153Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/pretraining.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/qat.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.162Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.310Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/FAQS.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.152Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.300Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/installation.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.305Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/template_free.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/conversation.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.153Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.dict.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.616Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.847Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.186Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.415Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.collators.core.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.925Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.158Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.inference.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.865Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.097Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.freeze.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.546Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.775Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.019Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.249Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.066Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.296Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.918Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.148Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.shared.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.732Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.967Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html</loc>
|
||||
<lastmod>2025-08-22T18:32:34.004Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.238Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.952Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.186Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.utils.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.021Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.251Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.624Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.854Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.base.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.882Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.116Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.243Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.472Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.458Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.687Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.quantization.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.653Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.884Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html</loc>
|
||||
<lastmod>2025-08-22T18:32:34.017Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.252Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.builders.base.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.683Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.917Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.490Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.719Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.894Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.128Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.liger.args.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.897Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.131Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.collators.mamba.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.947Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.181Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.model.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.030Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.261Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schedulers.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.591Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.821Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.lora.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.374Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.604Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.535Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.764Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.731Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.965Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mamba.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.989Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.220Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.enums.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.739Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.971Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.487Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.716Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.utils.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.404Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.634Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.training_args.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.705Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.939Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.999Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.233Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.916Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.146Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.load.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.940Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.171Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/train.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.592Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.825Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.887Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.120Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.992Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.226Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.203Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.432Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.419Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.649Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.486Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.lora.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.529Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.759Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.tokenizer.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.039Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.269Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.729Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.964Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.collators.batching.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.944Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.177Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.886Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.117Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.180Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.409Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.tokenization.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.523Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.752Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/common.architectures.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.905Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.139Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.745Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.980Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.base.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.969Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.199Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.448Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.677Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.utils.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.744Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.977Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.230Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.459Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.config.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.846Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.078Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.peft.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.707Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.939Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.167Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.397Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.241Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.471Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.414Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.644Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.base.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.098Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.328Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.923Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.157Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.relora.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.418Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.647Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/common.const.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.907Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.141Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.469Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.697Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.distributed.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.611Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.841Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.constants.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.057Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.288Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.516Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.745Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/datasets.html</loc>
|
||||
<lastmod>2025-08-22T18:32:32.613Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:54.846Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.475Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.704Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.processor.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.040Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.271Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.904Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:56.138Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.adapter.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.046Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.276Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.model.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.674Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.905Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html</loc>
|
||||
<lastmod>2025-08-22T18:32:33.240Z</lastmod>
|
||||
<lastmod>2025-08-24T03:40:55.469Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/batch_vs_grad.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.153Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/mac.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.305Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/nd_parallelism.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset_loading.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/lora_optims.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.305Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/unsloth.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.158Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/config-reference.html</loc>
|
||||
<lastmod>2025-08-22T18:32:49.071Z</lastmod>
|
||||
<lastmod>2025-08-24T03:41:09.823Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/custom_integrations.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.153Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/faq.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/amd_hpc.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.153Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/multi-node.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/cli.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.153Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.302Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/nccl.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/optimizers.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/getting-started.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.154Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.303Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/quantize.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/reward_modelling.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.157Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html</loc>
|
||||
<lastmod>2025-08-22T18:29:19.178Z</lastmod>
|
||||
<lastmod>2025-08-24T03:37:40.326Z</lastmod>
|
||||
</url>
|
||||
</urlset>
|
||||
|
||||
Reference in New Issue
Block a user