Built site for gh-pages
This commit is contained in:
@@ -20,6 +20,41 @@ ul.task-list li input[type="checkbox"] {
|
||||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||||
vertical-align: middle;
|
||||
}
|
||||
/* CSS for syntax highlighting */
|
||||
html { -webkit-text-size-adjust: 100%; }
|
||||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||||
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
|
||||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||||
.sourceCode { overflow: visible; }
|
||||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||||
div.sourceCode { margin: 1em 0; }
|
||||
pre.sourceCode { margin: 0; }
|
||||
@media screen {
|
||||
div.sourceCode { overflow: auto; }
|
||||
}
|
||||
@media print {
|
||||
pre > code.sourceCode { white-space: pre-wrap; }
|
||||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||||
}
|
||||
pre.numberSource code
|
||||
{ counter-reset: source-line 0; }
|
||||
pre.numberSource code > span
|
||||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||||
pre.numberSource code > span > a:first-child::before
|
||||
{ content: counter(source-line);
|
||||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||||
border: none; display: inline-block;
|
||||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||||
-khtml-user-select: none; -moz-user-select: none;
|
||||
-ms-user-select: none; user-select: none;
|
||||
padding: 0 4px; width: 4em;
|
||||
}
|
||||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||||
div.sourceCode
|
||||
{ }
|
||||
@media screen {
|
||||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
@@ -474,7 +509,13 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<h2 id="toc-title">On this page</h2>
|
||||
|
||||
<ul>
|
||||
<li><a href="#axolotl.prompt_strategies.dpo.chat_template" id="toc-axolotl.prompt_strategies.dpo.chat_template" class="nav-link active" data-scroll-target="#axolotl.prompt_strategies.dpo.chat_template">prompt_strategies.dpo.chat_template</a></li>
|
||||
<li><a href="#axolotl.prompt_strategies.dpo.chat_template" id="toc-axolotl.prompt_strategies.dpo.chat_template" class="nav-link active" data-scroll-target="#axolotl.prompt_strategies.dpo.chat_template">prompt_strategies.dpo.chat_template</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#functions" id="toc-functions" class="nav-link" data-scroll-target="#functions">Functions</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#axolotl.prompt_strategies.dpo.chat_template.argilla_chat" id="toc-axolotl.prompt_strategies.dpo.chat_template.argilla_chat" class="nav-link" data-scroll-target="#axolotl.prompt_strategies.dpo.chat_template.argilla_chat">argilla_chat</a></li>
|
||||
</ul></li>
|
||||
</ul></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
@@ -488,8 +529,110 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<h1>prompt_strategies.dpo.chat_template</h1>
|
||||
<p><code>prompt_strategies.dpo.chat_template</code></p>
|
||||
<p>DPO prompt strategies for using tokenizer chat templates.</p>
|
||||
<section id="functions" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="functions">Functions</h2>
|
||||
<table class="caption-top table">
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th>Name</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="odd">
|
||||
<td><a href="#axolotl.prompt_strategies.dpo.chat_template.argilla_chat">argilla_chat</a></td>
|
||||
<td>DPO chat template strategy for argilla-style datasets.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<section id="axolotl.prompt_strategies.dpo.chat_template.argilla_chat" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="axolotl.prompt_strategies.dpo.chat_template.argilla_chat">argilla_chat</h3>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>prompt_strategies.dpo.chat_template.argilla_chat(cfg, dataset_idx<span class="op">=</span><span class="dv">0</span>, <span class="op">**</span>kwargs)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>DPO chat template strategy for argilla-style datasets.</p>
|
||||
<p>For argilla-style datasets where chosen/rejected contain full conversations
|
||||
instead of single response messages. Extracts the conversation history from
|
||||
the chosen field and formats both chosen/rejected responses using the
|
||||
configured chat template.</p>
|
||||
<section id="parameters" class="level4 doc-section doc-section-parameters">
|
||||
<h4 class="doc-section doc-section-parameters anchored" data-anchor-id="parameters">Parameters</h4>
|
||||
<table class="caption-top table">
|
||||
<colgroup>
|
||||
<col style="width: 12%">
|
||||
<col style="width: 7%">
|
||||
<col style="width: 67%">
|
||||
<col style="width: 11%">
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th>Name</th>
|
||||
<th>Type</th>
|
||||
<th>Description</th>
|
||||
<th>Default</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="odd">
|
||||
<td>cfg</td>
|
||||
<td></td>
|
||||
<td>Configuration object containing chat_template and dataset settings</td>
|
||||
<td><em>required</em></td>
|
||||
</tr>
|
||||
<tr class="even">
|
||||
<td>dataset_idx</td>
|
||||
<td></td>
|
||||
<td>Index of the dataset in the config (default: 0)</td>
|
||||
<td><code>0</code></td>
|
||||
</tr>
|
||||
<tr class="odd">
|
||||
<td>**kwargs</td>
|
||||
<td></td>
|
||||
<td>Additional keyword arguments (unused)</td>
|
||||
<td><code>{}</code></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
<section id="returns" class="level4 doc-section doc-section-returns">
|
||||
<h4 class="doc-section doc-section-returns anchored" data-anchor-id="returns">Returns</h4>
|
||||
<table class="caption-top table">
|
||||
<colgroup>
|
||||
<col style="width: 4%">
|
||||
<col style="width: 4%">
|
||||
<col style="width: 91%">
|
||||
</colgroup>
|
||||
<thead>
|
||||
<tr class="header">
|
||||
<th>Name</th>
|
||||
<th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr class="odd">
|
||||
<td>tuple</td>
|
||||
<td></td>
|
||||
<td>(transform_fn, dataset_kwargs) where: - transform_fn: Function to transform dataset samples - dataset_kwargs: Dict with ‘remove_columns’ specifying columns to drop</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
<section id="dataset-format" class="level4 doc-section doc-section-dataset-format">
|
||||
<h4 class="doc-section doc-section-dataset-format anchored" data-anchor-id="dataset-format">Dataset format</h4>
|
||||
<p>{
|
||||
“chosen”: [
|
||||
{“role”: “user”, “content”: “…”},
|
||||
{“role”: “assistant”, “content”: “…”}
|
||||
],
|
||||
“rejected”: [
|
||||
{“role”: “user”, “content”: “…”},
|
||||
{“role”: “assistant”, “content”: “…”}
|
||||
]
|
||||
}</p>
|
||||
|
||||
|
||||
</section>
|
||||
</section>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
</main> <!-- /main -->
|
||||
|
||||
442
docs/rlhf.html
442
docs/rlhf.html
@@ -528,6 +528,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<li><a href="#llama3.prompt_pairs" id="toc-llama3.prompt_pairs" class="nav-link" data-scroll-target="#llama3.prompt_pairs">llama3.prompt_pairs</a></li>
|
||||
<li><a href="#llama3.ultra" id="toc-llama3.ultra" class="nav-link" data-scroll-target="#llama3.ultra">llama3.ultra</a></li>
|
||||
<li><a href="#zephyr.nectar" id="toc-zephyr.nectar" class="nav-link" data-scroll-target="#zephyr.nectar">zephyr.nectar</a></li>
|
||||
<li><a href="#chat_template.argilla_chat" id="toc-chat_template.argilla_chat" class="nav-link" data-scroll-target="#chat_template.argilla_chat">chat_template.argilla_chat</a></li>
|
||||
<li><a href="#chat_template.default" id="toc-chat_template.default" class="nav-link" data-scroll-target="#chat_template.default">chat_template.default</a></li>
|
||||
<li><a href="#user_defined.default" id="toc-user_defined.default" class="nav-link" data-scroll-target="#user_defined.default">user_defined.default</a></li>
|
||||
</ul></li>
|
||||
@@ -787,228 +788,241 @@ Tip
|
||||
<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="chat_template.argilla_chat" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chat_template.argilla_chat">chat_template.argilla_chat</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb15"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
|
||||
<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="chat_template.default" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chat_template.default">chat_template.default</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb15"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
|
||||
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
|
||||
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.default</span></span>
|
||||
<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_messages</span><span class="kw">:</span><span class="at"> </span><span class="st">"messages"</span></span>
|
||||
<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_chosen</span><span class="kw">:</span><span class="at"> </span><span class="st">"chosen"</span></span>
|
||||
<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_rejected</span><span class="kw">:</span><span class="at"> </span><span class="st">"rejected"</span></span>
|
||||
<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">message_property_mappings</span><span class="kw">:</span></span>
|
||||
<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">role</span><span class="kw">:</span><span class="at"> role</span></span>
|
||||
<span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">content</span><span class="kw">:</span><span class="at"> content</span></span>
|
||||
<span id="cb15-12"><a href="#cb15-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">roles</span><span class="kw">:</span></span>
|
||||
<span id="cb15-13"><a href="#cb15-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">user</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"user"</span><span class="kw">]</span></span>
|
||||
<span id="cb15-14"><a href="#cb15-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">assistant</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"assistant"</span><span class="kw">]</span></span>
|
||||
<span id="cb15-15"><a href="#cb15-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">system</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"system"</span><span class="kw">]</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb16"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
|
||||
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
|
||||
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.default</span></span>
|
||||
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_messages</span><span class="kw">:</span><span class="at"> </span><span class="st">"messages"</span></span>
|
||||
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_chosen</span><span class="kw">:</span><span class="at"> </span><span class="st">"chosen"</span></span>
|
||||
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_rejected</span><span class="kw">:</span><span class="at"> </span><span class="st">"rejected"</span></span>
|
||||
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">message_property_mappings</span><span class="kw">:</span></span>
|
||||
<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">role</span><span class="kw">:</span><span class="at"> role</span></span>
|
||||
<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">content</span><span class="kw">:</span><span class="at"> content</span></span>
|
||||
<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">roles</span><span class="kw">:</span></span>
|
||||
<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">user</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"user"</span><span class="kw">]</span></span>
|
||||
<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">assistant</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"assistant"</span><span class="kw">]</span></span>
|
||||
<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">system</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"system"</span><span class="kw">]</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>Sample input format:</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb16"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"messages"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span></span>
|
||||
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"system"</span><span class="fu">,</span></span>
|
||||
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span></span>
|
||||
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span></span>
|
||||
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a> <span class="er">//</span> <span class="er">...</span> <span class="er">more</span> <span class="er">messages</span></span>
|
||||
<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
|
||||
<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="fu">{</span></span>
|
||||
<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
|
||||
<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a> <span class="fu">},</span></span>
|
||||
<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="fu">{</span></span>
|
||||
<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
|
||||
<span id="cb16-19"><a href="#cb16-19" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb16-20"><a href="#cb16-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span></span>
|
||||
<span id="cb16-21"><a href="#cb16-21" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb17"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"messages"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span></span>
|
||||
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"system"</span><span class="fu">,</span></span>
|
||||
<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span></span>
|
||||
<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span></span>
|
||||
<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a> <span class="er">//</span> <span class="er">...</span> <span class="er">more</span> <span class="er">messages</span></span>
|
||||
<span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
|
||||
<span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="fu">{</span></span>
|
||||
<span id="cb17-14"><a href="#cb17-14" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
|
||||
<span id="cb17-15"><a href="#cb17-15" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb17-16"><a href="#cb17-16" aria-hidden="true" tabindex="-1"></a> <span class="fu">},</span></span>
|
||||
<span id="cb17-17"><a href="#cb17-17" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="fu">{</span></span>
|
||||
<span id="cb17-18"><a href="#cb17-18" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
|
||||
<span id="cb17-19"><a href="#cb17-19" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb17-20"><a href="#cb17-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span></span>
|
||||
<span id="cb17-21"><a href="#cb17-21" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="user_defined.default" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="user_defined.default">user_defined.default</h4>
|
||||
<p>For custom behaviors,</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb17"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
|
||||
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
|
||||
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span></span>
|
||||
<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">"prompt"</span></span>
|
||||
<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_system</span><span class="kw">:</span><span class="at"> </span><span class="st">"system"</span></span>
|
||||
<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_chosen</span><span class="kw">:</span><span class="at"> </span><span class="st">"chosen"</span></span>
|
||||
<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_rejected</span><span class="kw">:</span><span class="at"> </span><span class="st">"rejected"</span></span>
|
||||
<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">prompt_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{prompt}"</span></span>
|
||||
<span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">chosen_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{chosen}"</span></span>
|
||||
<span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">rejected_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{rejected}"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb18"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
|
||||
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
|
||||
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span></span>
|
||||
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">"prompt"</span></span>
|
||||
<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_system</span><span class="kw">:</span><span class="at"> </span><span class="st">"system"</span></span>
|
||||
<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_chosen</span><span class="kw">:</span><span class="at"> </span><span class="st">"chosen"</span></span>
|
||||
<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_rejected</span><span class="kw">:</span><span class="at"> </span><span class="st">"rejected"</span></span>
|
||||
<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">prompt_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{prompt}"</span></span>
|
||||
<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">chosen_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{chosen}"</span></span>
|
||||
<span id="cb18-12"><a href="#cb18-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">rejected_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{rejected}"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>The input format is a simple JSON input with customizable fields based on the above config.</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb18"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb19"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="ipo" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="ipo">IPO</h3>
|
||||
<p>As IPO is just DPO with a different loss function, all supported dataset formats for <a href="#dpo">DPO</a> are also supported for IPO.</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb19"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> ipo</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> ipo</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="orpo" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="orpo">ORPO</h3>
|
||||
<p>Paper: https://arxiv.org/abs/2403.07691</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> orpo</span></span>
|
||||
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="fu">orpo_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
|
||||
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="fu">chat_template</span><span class="kw">:</span><span class="at"> chatml</span></span>
|
||||
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned</span></span>
|
||||
<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.argilla</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb21"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> orpo</span></span>
|
||||
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a><span class="fu">orpo_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
|
||||
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a><span class="fu">chat_template</span><span class="kw">:</span><span class="at"> chatml</span></span>
|
||||
<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned</span></span>
|
||||
<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.argilla</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>ORPO supports the following types with the following dataset format:</p>
|
||||
<section id="chat_template.argilla" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chat_template.argilla">chat_template.argilla</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb21"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">if</span> <span class="er">available</span><span class="fu">,</span> <span class="er">will</span> <span class="er">be</span> <span class="er">taken</span> <span class="er">as</span> <span class="er">user</span> <span class="er">message</span> <span class="er">for</span> <span class="er">single-turn</span> <span class="er">instead</span> <span class="er">of</span> <span class="er">from</span> <span class="er">list</span> <span class="er">below</span></span>
|
||||
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a> <span class="er">//</span> <span class="er">chosen/rejected</span> <span class="er">should</span> <span class="er">be</span> <span class="er">same</span> <span class="er">till</span> <span class="er">last</span> <span class="er">content</span> <span class="er">and</span> <span class="er">only</span> <span class="er">even-number</span> <span class="er">of</span> <span class="er">alternating</span> <span class="er">user/assistant</span> <span class="er">turns</span></span>
|
||||
<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
|
||||
<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb21-12"><a href="#cb21-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb21-13"><a href="#cb21-13" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb21-14"><a href="#cb21-14" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb22"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">if</span> <span class="er">available</span><span class="fu">,</span> <span class="er">will</span> <span class="er">be</span> <span class="er">taken</span> <span class="er">as</span> <span class="er">user</span> <span class="er">message</span> <span class="er">for</span> <span class="er">single-turn</span> <span class="er">instead</span> <span class="er">of</span> <span class="er">from</span> <span class="er">list</span> <span class="er">below</span></span>
|
||||
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a> <span class="er">//</span> <span class="er">chosen/rejected</span> <span class="er">should</span> <span class="er">be</span> <span class="er">same</span> <span class="er">till</span> <span class="er">last</span> <span class="er">content</span> <span class="er">and</span> <span class="er">only</span> <span class="er">even-number</span> <span class="er">of</span> <span class="er">alternating</span> <span class="er">user/assistant</span> <span class="er">turns</span></span>
|
||||
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
|
||||
<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="kto" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="kto">KTO</h3>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb22"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
|
||||
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span><span class="co"> # default</span></span>
|
||||
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="fu">kto_desirable_weight</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # default</span></span>
|
||||
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a><span class="fu">kto_undesirable_weight</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # default</span></span>
|
||||
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned-kto</span></span>
|
||||
<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> llama3.ultra</span></span>
|
||||
<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing_kwargs</span><span class="kw">:</span></span>
|
||||
<span id="cb22-15"><a href="#cb22-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_reentrant</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb23"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
|
||||
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span><span class="co"> # default</span></span>
|
||||
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a><span class="fu">kto_desirable_weight</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # default</span></span>
|
||||
<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a><span class="fu">kto_undesirable_weight</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # default</span></span>
|
||||
<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned-kto</span></span>
|
||||
<span id="cb23-10"><a href="#cb23-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> llama3.ultra</span></span>
|
||||
<span id="cb23-11"><a href="#cb23-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb23-12"><a href="#cb23-12" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb23-13"><a href="#cb23-13" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb23-14"><a href="#cb23-14" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing_kwargs</span><span class="kw">:</span></span>
|
||||
<span id="cb23-15"><a href="#cb23-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_reentrant</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>KTO supports the following types with the following dataset format:</p>
|
||||
<section id="chatml.argilla-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.argilla-1">chatml.argilla</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb23"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb24"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="chatml.argilla_chat-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.argilla_chat-1">chatml.argilla_chat</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb24"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
|
||||
<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb25"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
|
||||
<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb25-8"><a href="#cb25-8" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="chatml.intel-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.intel-1">chatml.intel</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb25"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="chatml.prompt_pairs-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.prompt_pairs-1">chatml.prompt_pairs</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb26"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="chatml.ultra-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.ultra-1">chatml.ultra</h4>
|
||||
<section id="chatml.prompt_pairs-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.prompt_pairs-1">chatml.prompt_pairs</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb27"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="llama3.argilla-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.argilla-1">llama3.argilla</h4>
|
||||
<section id="chatml.ultra-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.ultra-1">chatml.ultra</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb28"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="llama3.argilla-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.argilla-1">llama3.argilla</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb29"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="llama3.argilla_chat-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.argilla_chat-1">llama3.argilla_chat</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb29"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb30"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb30-6"><a href="#cb30-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="llama3.intel-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.intel-1">llama3.intel</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb30"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="llama3.prompt_pairs-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.prompt_pairs-1">llama3.prompt_pairs</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb31"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="llama3.ultra-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.ultra-1">llama3.ultra</h4>
|
||||
<section id="llama3.prompt_pairs-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.prompt_pairs-1">llama3.prompt_pairs</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb32"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="llama3.ultra-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.ultra-1">llama3.ultra</h4>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb33"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="user_defined.default-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="user_defined.default-1">user_defined.default</h4>
|
||||
<p>For custom behaviors,</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb33"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
|
||||
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
|
||||
<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span></span>
|
||||
<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">"prompt"</span></span>
|
||||
<span id="cb33-7"><a href="#cb33-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_system</span><span class="kw">:</span><span class="at"> </span><span class="st">"system"</span></span>
|
||||
<span id="cb33-8"><a href="#cb33-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_completion</span><span class="kw">:</span><span class="at"> </span><span class="st">"completion"</span></span>
|
||||
<span id="cb33-9"><a href="#cb33-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_label</span><span class="kw">:</span><span class="at"> </span><span class="st">"label"</span></span>
|
||||
<span id="cb33-10"><a href="#cb33-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">prompt_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{prompt}"</span></span>
|
||||
<span id="cb33-11"><a href="#cb33-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">completion_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{completion}"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb34"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
|
||||
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
|
||||
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span></span>
|
||||
<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">"prompt"</span></span>
|
||||
<span id="cb34-7"><a href="#cb34-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_system</span><span class="kw">:</span><span class="at"> </span><span class="st">"system"</span></span>
|
||||
<span id="cb34-8"><a href="#cb34-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_completion</span><span class="kw">:</span><span class="at"> </span><span class="st">"completion"</span></span>
|
||||
<span id="cb34-9"><a href="#cb34-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_label</span><span class="kw">:</span><span class="at"> </span><span class="st">"label"</span></span>
|
||||
<span id="cb34-10"><a href="#cb34-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">prompt_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{prompt}"</span></span>
|
||||
<span id="cb34-11"><a href="#cb34-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">completion_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{completion}"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>The input format is a simple JSON input with customizable fields based on the above config.</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb34"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"label"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb35"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"label"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="grpo" class="level3">
|
||||
@@ -1040,25 +1054,25 @@ Important
|
||||
<p>Make sure you’ve installed the correct version of vLLM by including it as an extra when installing axolotl, e.g. <code>pip install axolotl[vllm]</code>.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb35"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="fu">base_model</span><span class="kw">:</span><span class="at"> Qwen/Qwen2.5-1.5B-Instruct</span></span>
|
||||
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a><span class="fu">vllm</span><span class="kw">:</span></span>
|
||||
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">host</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.0.0.0</span></span>
|
||||
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">port</span><span class="kw">:</span><span class="at"> </span><span class="dv">8000</span></span>
|
||||
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">tensor_parallel_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||||
<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">gpu_memory_utilization</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.85</span></span>
|
||||
<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">dtype</span><span class="kw">:</span><span class="at"> auto</span></span>
|
||||
<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a><span class="co"> # max_model_len: # you may find it useful to set the vLLM model context length if you know this beforehand</span></span>
|
||||
<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> grpo</span></span>
|
||||
<span id="cb35-12"><a href="#cb35-12" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||||
<span id="cb35-13"><a href="#cb35-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_vllm</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb35-14"><a href="#cb35-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_server_host</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.0.0.0</span></span>
|
||||
<span id="cb35-15"><a href="#cb35-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_server_port</span><span class="kw">:</span><span class="at"> </span><span class="dv">8000</span></span>
|
||||
<span id="cb35-16"><a href="#cb35-16" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_server_timeout</span><span class="kw">:</span><span class="at"> </span><span class="dv">300</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb36"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="va">CUDA_VISIBLE_DEVICES</span><span class="op">=</span>2,3 <span class="ex">axolotl</span> vllm-serve grpo.yaml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb36"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="fu">base_model</span><span class="kw">:</span><span class="at"> Qwen/Qwen2.5-1.5B-Instruct</span></span>
|
||||
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a><span class="fu">vllm</span><span class="kw">:</span></span>
|
||||
<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">host</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.0.0.0</span></span>
|
||||
<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">port</span><span class="kw">:</span><span class="at"> </span><span class="dv">8000</span></span>
|
||||
<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">tensor_parallel_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||||
<span id="cb36-7"><a href="#cb36-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">gpu_memory_utilization</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.85</span></span>
|
||||
<span id="cb36-8"><a href="#cb36-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">dtype</span><span class="kw">:</span><span class="at"> auto</span></span>
|
||||
<span id="cb36-9"><a href="#cb36-9" aria-hidden="true" tabindex="-1"></a><span class="co"> # max_model_len: # you may find it useful to set the vLLM model context length if you know this beforehand</span></span>
|
||||
<span id="cb36-10"><a href="#cb36-10" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb36-11"><a href="#cb36-11" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> grpo</span></span>
|
||||
<span id="cb36-12"><a href="#cb36-12" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||||
<span id="cb36-13"><a href="#cb36-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_vllm</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb36-14"><a href="#cb36-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_server_host</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.0.0.0</span></span>
|
||||
<span id="cb36-15"><a href="#cb36-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_server_port</span><span class="kw">:</span><span class="at"> </span><span class="dv">8000</span></span>
|
||||
<span id="cb36-16"><a href="#cb36-16" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_server_timeout</span><span class="kw">:</span><span class="at"> </span><span class="dv">300</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb37"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="va">CUDA_VISIBLE_DEVICES</span><span class="op">=</span>2,3 <span class="ex">axolotl</span> vllm-serve grpo.yaml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>Your <code>vLLM</code> instance will now attempt to spin up, and it’s time to kick off training utilizing our remaining two GPUs. In another terminal, execute:</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb37"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="va">CUDA_VISIBLE_DEVICES</span><span class="op">=</span>0,1 <span class="ex">axolotl</span> train grpo.yaml <span class="at">--num-processes</span> 2</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb38"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="va">CUDA_VISIBLE_DEVICES</span><span class="op">=</span>0,1 <span class="ex">axolotl</span> train grpo.yaml <span class="at">--num-processes</span> 2</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="callout callout-style-default callout-note callout-titled">
|
||||
<div class="callout-header d-flex align-content-center">
|
||||
<div class="callout-icon-container">
|
||||
@@ -1076,69 +1090,69 @@ Note
|
||||
<h4 class="anchored" data-anchor-id="reward-functions">Reward functions</h4>
|
||||
<p>GRPO uses custom reward functions and transformations. Please have them ready locally.</p>
|
||||
<p>For example, to load OpenAI’s GSM8K and use a random reward for completions:</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># rewards.py</span></span>
|
||||
<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> random</span>
|
||||
<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> rand_reward_func(completions, <span class="op">**</span>kwargs) <span class="op">-></span> <span class="bu">list</span>[<span class="bu">float</span>]:</span>
|
||||
<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> [random.uniform(<span class="dv">0</span>, <span class="dv">1</span>) <span class="cf">for</span> _ <span class="kw">in</span> completions]</span>
|
||||
<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb38-7"><a href="#cb38-7" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> oai_gsm8k_transform(cfg, <span class="op">*</span>args, <span class="op">**</span>kwargs):</span>
|
||||
<span id="cb38-8"><a href="#cb38-8" aria-hidden="true" tabindex="-1"></a> <span class="kw">def</span> transform_fn(example, tokenizer<span class="op">=</span><span class="va">None</span>):</span>
|
||||
<span id="cb38-9"><a href="#cb38-9" aria-hidden="true" tabindex="-1"></a> label <span class="op">=</span> example[<span class="st">"answer"</span>].split(<span class="st">"####"</span>)[<span class="op">-</span><span class="dv">1</span>].strip().replace(<span class="st">","</span>, <span class="st">""</span>)</span>
|
||||
<span id="cb38-10"><a href="#cb38-10" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> {</span>
|
||||
<span id="cb38-11"><a href="#cb38-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"prompt"</span>: [{<span class="st">"role"</span>: <span class="st">"user"</span>, <span class="st">"content"</span>: example[<span class="st">"question"</span>]},],</span>
|
||||
<span id="cb38-12"><a href="#cb38-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"answer"</span>: label,</span>
|
||||
<span id="cb38-13"><a href="#cb38-13" aria-hidden="true" tabindex="-1"></a> }</span>
|
||||
<span id="cb38-14"><a href="#cb38-14" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> transform_fn, {<span class="st">"remove_columns"</span>: [<span class="st">"question"</span>]}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb39"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> grpo</span></span>
|
||||
<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||||
<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.001</span></span>
|
||||
<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">max_completion_length</span><span class="kw">:</span><span class="at"> </span><span class="dv">256</span></span>
|
||||
<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_vllm</span><span class="kw">:</span><span class="at"> </span><span class="ch">True</span></span>
|
||||
<span id="cb39-7"><a href="#cb39-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">num_generations</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span></span>
|
||||
<span id="cb39-8"><a href="#cb39-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reward_funcs</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"rewards.rand_reward_func"</span><span class="kw">]</span><span class="co"> # format: '{file_name}.{fn_name}'</span></span>
|
||||
<span id="cb39-9"><a href="#cb39-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reward_weights</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="fl">1.0</span><span class="kw">]</span></span>
|
||||
<span id="cb39-10"><a href="#cb39-10" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb39-11"><a href="#cb39-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> openai/gsm8k</span></span>
|
||||
<span id="cb39-12"><a href="#cb39-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">name</span><span class="kw">:</span><span class="at"> main</span></span>
|
||||
<span id="cb39-13"><a href="#cb39-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> rewards.oai_gsm8k_transform</span><span class="co"> # format: '{file_name}.{fn_name}'</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a><span class="co"># rewards.py</span></span>
|
||||
<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> random</span>
|
||||
<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> rand_reward_func(completions, <span class="op">**</span>kwargs) <span class="op">-></span> <span class="bu">list</span>[<span class="bu">float</span>]:</span>
|
||||
<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> [random.uniform(<span class="dv">0</span>, <span class="dv">1</span>) <span class="cf">for</span> _ <span class="kw">in</span> completions]</span>
|
||||
<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb39-7"><a href="#cb39-7" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> oai_gsm8k_transform(cfg, <span class="op">*</span>args, <span class="op">**</span>kwargs):</span>
|
||||
<span id="cb39-8"><a href="#cb39-8" aria-hidden="true" tabindex="-1"></a> <span class="kw">def</span> transform_fn(example, tokenizer<span class="op">=</span><span class="va">None</span>):</span>
|
||||
<span id="cb39-9"><a href="#cb39-9" aria-hidden="true" tabindex="-1"></a> label <span class="op">=</span> example[<span class="st">"answer"</span>].split(<span class="st">"####"</span>)[<span class="op">-</span><span class="dv">1</span>].strip().replace(<span class="st">","</span>, <span class="st">""</span>)</span>
|
||||
<span id="cb39-10"><a href="#cb39-10" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> {</span>
|
||||
<span id="cb39-11"><a href="#cb39-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"prompt"</span>: [{<span class="st">"role"</span>: <span class="st">"user"</span>, <span class="st">"content"</span>: example[<span class="st">"question"</span>]},],</span>
|
||||
<span id="cb39-12"><a href="#cb39-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"answer"</span>: label,</span>
|
||||
<span id="cb39-13"><a href="#cb39-13" aria-hidden="true" tabindex="-1"></a> }</span>
|
||||
<span id="cb39-14"><a href="#cb39-14" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> transform_fn, {<span class="st">"remove_columns"</span>: [<span class="st">"question"</span>]}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb40"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> grpo</span></span>
|
||||
<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb40-3"><a href="#cb40-3" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||||
<span id="cb40-4"><a href="#cb40-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.001</span></span>
|
||||
<span id="cb40-5"><a href="#cb40-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">max_completion_length</span><span class="kw">:</span><span class="at"> </span><span class="dv">256</span></span>
|
||||
<span id="cb40-6"><a href="#cb40-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_vllm</span><span class="kw">:</span><span class="at"> </span><span class="ch">True</span></span>
|
||||
<span id="cb40-7"><a href="#cb40-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">num_generations</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span></span>
|
||||
<span id="cb40-8"><a href="#cb40-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reward_funcs</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"rewards.rand_reward_func"</span><span class="kw">]</span><span class="co"> # format: '{file_name}.{fn_name}'</span></span>
|
||||
<span id="cb40-9"><a href="#cb40-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reward_weights</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="fl">1.0</span><span class="kw">]</span></span>
|
||||
<span id="cb40-10"><a href="#cb40-10" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb40-11"><a href="#cb40-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> openai/gsm8k</span></span>
|
||||
<span id="cb40-12"><a href="#cb40-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">name</span><span class="kw">:</span><span class="at"> main</span></span>
|
||||
<span id="cb40-13"><a href="#cb40-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> rewards.oai_gsm8k_transform</span><span class="co"> # format: '{file_name}.{fn_name}'</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>To see other examples of custom reward functions, please see <a href="https://github.com/huggingface/trl/blob/main/docs/source/grpo_trainer.md#using-a-custom-reward-function">TRL GRPO Docs</a>.</p>
|
||||
<p>To see all configs, please see <a href="https://github.com/axolotl-ai-cloud/axolotl/blob/v0.9.2/src/axolotl/utils/schemas/trl.py">TRLConfig</a>.</p>
|
||||
</section>
|
||||
<section id="grpo-with-dapodr.-grpo-loss" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="grpo-with-dapodr.-grpo-loss">GRPO with DAPO/Dr. GRPO loss</h4>
|
||||
<p>The DAPO paper and subsequently Dr. GRPO paper proposed an alternative loss function for GRPO to remediate the penalty in longer responses.</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb40"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||||
<span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">loss_type</span><span class="kw">:</span><span class="at"> dr_grpo</span></span>
|
||||
<span id="cb40-3"><a href="#cb40-3" aria-hidden="true" tabindex="-1"></a><span class="co"> # Normalizes loss based on max completion length (default: 256)</span></span>
|
||||
<span id="cb40-4"><a href="#cb40-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">max_completion_length</span><span class="kw">:</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb41"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||||
<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">loss_type</span><span class="kw">:</span><span class="at"> dr_grpo</span></span>
|
||||
<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a><span class="co"> # Normalizes loss based on max completion length (default: 256)</span></span>
|
||||
<span id="cb41-4"><a href="#cb41-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">max_completion_length</span><span class="kw">:</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>For more information, see <a href="https://huggingface.co/docs/trl/v0.17.0/en/grpo_trainer#loss-types">GRPO docs</a>.</p>
|
||||
</section>
|
||||
</section>
|
||||
<section id="simpo" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="simpo">SimPO</h3>
|
||||
<p>SimPO uses <a href="https://huggingface.co/docs/trl/main/en/cpo_trainer">CPOTrainer</a> but with alternative loss function.</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb41"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> simpo</span></span>
|
||||
<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span><span class="co"> # default in CPOTrainer</span></span>
|
||||
<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a><span class="fu">cpo_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # default in CPOTrainer</span></span>
|
||||
<span id="cb41-4"><a href="#cb41-4" aria-hidden="true" tabindex="-1"></a><span class="fu">simpo_gamma</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span><span class="co"> # default in CPOTrainer</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb42"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> simpo</span></span>
|
||||
<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span><span class="co"> # default in CPOTrainer</span></span>
|
||||
<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a><span class="fu">cpo_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # default in CPOTrainer</span></span>
|
||||
<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a><span class="fu">simpo_gamma</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span><span class="co"> # default in CPOTrainer</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>This method uses the same dataset format as <a href="#dpo">DPO</a>.</p>
|
||||
</section>
|
||||
<section id="using-local-dataset-files" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="using-local-dataset-files">Using local dataset files</h3>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb42"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span></span>
|
||||
<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">data_files</span><span class="kw">:</span></span>
|
||||
<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> orca_rlhf.jsonl</span></span>
|
||||
<span id="cb42-5"><a href="#cb42-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb42-6"><a href="#cb42-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb43"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span></span>
|
||||
<span id="cb43-3"><a href="#cb43-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">data_files</span><span class="kw">:</span></span>
|
||||
<span id="cb43-4"><a href="#cb43-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> orca_rlhf.jsonl</span></span>
|
||||
<span id="cb43-5"><a href="#cb43-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb43-6"><a href="#cb43-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="trl-auto-unwrapping-for-peft" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="trl-auto-unwrapping-for-peft">TRL auto-unwrapping for PEFT</h3>
|
||||
<p>TRL supports auto-unwrapping PEFT models for RL training paradigms which rely on a reference model. This significantly reduces memory pressure as an additional refreference model does not need to be loaded, and reference model log-probabilities can be obtained by disabling PEFT adapters. This is enabled by default. To turn it off, pass the following config:</p>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb43"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="co"># load ref model when adapter training.</span></span>
|
||||
<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_adapter_ref_model</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb44"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="co"># load ref model when adapter training.</span></span>
|
||||
<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_adapter_ref_model</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
11
search.json
11
search.json
File diff suppressed because one or more lines are too long
398
sitemap.xml
398
sitemap.xml
@@ -2,798 +2,798 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.499Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.737Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/mac.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/cli.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/nccl.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/getting-started.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.712Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/lr_groups.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/qat.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/multipack.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/streaming.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/lora_optims.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/amd_hpc.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.472Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/debugging.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/conversation.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/inst_tune.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/index.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/config-reference.html</loc>
|
||||
<lastmod>2025-10-17T03:38:56.163Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:28.425Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/multimodal.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/ray-integration.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/faq.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset_preprocessing.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/torchao.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/optimizers.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schedulers.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.077Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.283Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.sweeps.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.279Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.484Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/datasets.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.870Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.081Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.tokenization.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.993Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.199Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.tokenizer.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.397Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.601Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_expand_mask.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.866Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.071Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_cpu.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.952Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.158Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.data.sft.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.128Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.333Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.transformers_fa_utils.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.933Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.139Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.patch_manager.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.418Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.622Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.liger.args.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.452Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.654Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.peft.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.218Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.423Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.pygmalion.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.612Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.813Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_instruct.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.530Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.732Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.cloud.base.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.235Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.440Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.gradient_checkpointing.offload_disk.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.985Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.190Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.swiglu.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.836Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.042Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.cut_cross_entropy.args.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.437Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.639Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.user_defined.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.673Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.880Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.utils.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.911Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.116Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.builders.rl.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.970Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.180Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.processor.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.399Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.603Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.lisa.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.584Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.785Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.training_args.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.986Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.195Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.adapter.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.406Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.609Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.merge_sharded_fsdp_weights.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.206Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.411Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.train.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.089Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.296Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.rng_state_loader.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.431Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.634Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.completion.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.578Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.779Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.stepwise_supervised.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.591Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.792Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.lora_kernels.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.901Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.106Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.messages.chat.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.617Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.819Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.user_defined.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.555Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.757Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.chat.messages.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.015Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.224Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.scheduler.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.439Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.642Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.user_defined.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.649Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.856Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.llama3.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.661Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.868Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.integrations.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.244Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.449Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/convert.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.887Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.098Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.passthrough.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.651Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.858Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.config.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.167Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.373Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.enums.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.254Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.459Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.btlm_attn_hijack_flash.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.912Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.118Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chat_template.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.619Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.827Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.trainer.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.358Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.562Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.lm_eval.args.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.456Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.658Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.collators.core.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.486Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.687Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.shared.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.020Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.229Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orpo.chat_template.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.699Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.905Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.samplers.multipack.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.570Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.770Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.qat.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.601Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.802Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.chat_template.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.511Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.multimodal.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.228Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.433Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.comet_.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.593Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.794Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.base.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.470Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.673Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.utils.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.847Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.053Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.merge_lora.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.192Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.397Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.245Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.449Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.ctx_managers.sequence_parallel.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.468Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.671Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/index.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.772Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:10.982Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.llama3.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.632Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.840Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.mixtral.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.948Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.154Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.orcamini.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.604Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.805Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.grpo.sampler.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.373Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.577Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.lora.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.001Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.207Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mixins.optimizer.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.427Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.630Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.config.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.158Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.364Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.multipack.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.860Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.065Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.collators.batching.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.510Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.710Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.quantization.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.152Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.357Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.dict.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.108Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.314Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.quantize.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.845Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.051Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.training.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.185Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.390Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/train.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.850Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.061Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.datasets.transforms.chat_builder.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.036Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.245Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/inference.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/FAQS.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.471Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.709Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/examples/colab-notebooks/colab-axolotl-example.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.481Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.719Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/index.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.494Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.732Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/custom_integrations.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.utils.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.262Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.466Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.geglu.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.823Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.029Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.builders.causal.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.964Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.174Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.mamba.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.337Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.541Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.bradley_terry.llama3.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.703Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.910Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.datasets.chat.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.027Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.236Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.collators.mm_chat.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.520Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.720Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.llama2_chat.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.571Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.772Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/common.const.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.464Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.666Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.quantize.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.223Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.427Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.trainer.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.043Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.249Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.delinearize_llama4.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.164Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.369Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/evaluate.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.863Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.074Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.mistral_attn_hijack_flash.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.858Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.063Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.model.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.387Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.591Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.distributed.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.102Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.308Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.model_shard_quant.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.008Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.213Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/kernels.lora.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.810Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.017Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.main.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.078Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.286Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.spectrum.args.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.461Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.662Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.optimizers.adopt.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.118Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.324Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.cloud.modal_.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.243Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.448Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_flash.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.854Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.060Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.builders.base.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.958Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.169Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.trl.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.222Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.427Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.args.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.259Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.463Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.base.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.312Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.516Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_patch_multipack.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.914Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.120Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.llama_attn_hijack_xformers.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.856Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.061Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.model.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.176Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.381Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.kto.chatml.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.671Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.878Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.mlflow_.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.589Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.789Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/common.datasets.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.483Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.684Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.schemas.datasets.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.207Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.412Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.fetch.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.265Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.470Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.chatml.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.645Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.853Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.relora.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.864Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.070Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.evaluate.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.099Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.306Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.dpo.zephyr.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.647Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.855Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.utils.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.375Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.579Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_w_system.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.544Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.747Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.chat_templates.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.995Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.200Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.data.streaming.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.120Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.326Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.bench.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.012Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.218Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/common.architectures.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.463Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.664Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.checks.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.137Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.342Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.dpo.trainer.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.345Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.549Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.base.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.433Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.635Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.train.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.294Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.498Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.collators.mamba.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.514Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.art.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.129Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.334Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.trainer_fsdp_optim.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.925Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.131Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/logging_config.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.950Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.161Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.freeze.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.022Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.227Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.metharme.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.599Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.801Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.alpaca_chat.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.528Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.730Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.stablelm_attn_hijack_flash.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.921Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.127Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/models.mamba.modeling_mamba.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.484Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.685Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.trainers.trl.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.330Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.534Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_strategies.input_output.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.585Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.787Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/loaders.constants.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.420Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.623Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.data.batch_dataset_fetcher.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.946Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.152Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.vllm_serve.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.231Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.436Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/prompt_tokenizers.html</loc>
|
||||
<lastmod>2025-10-17T03:38:38.938Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.149Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.args.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.124Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.330Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.inference.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.182Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.387Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.utils.load.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.272Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.477Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/cli.preprocess.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.217Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.421Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.profiler.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.582Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.783Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/utils.callbacks.perplexity.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.578Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.778Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.chatml.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.017Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.226Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.grokfast.optimizer.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.439Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.640Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/integrations.kd.trainer.html</loc>
|
||||
<lastmod>2025-10-17T03:38:40.448Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.650Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/monkeypatch.unsloth_.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.935Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:12.141Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/api/core.chat.format.llama3x.html</loc>
|
||||
<lastmod>2025-10-17T03:38:39.019Z</lastmod>
|
||||
<lastmod>2025-10-17T10:04:11.228Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/reward_modelling.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/quantize.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/fsdp_qlora.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/nd_parallelism.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/batch_vs_grad.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.472Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/multi-node.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/rlhf.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/stepwise_supervised.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/pretraining.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/tokenized.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset-formats/template_free.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/multi-gpu.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/input_output.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/docker.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/gradient_checkpointing.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.712Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/optimizations.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/sequence_parallelism.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/dataset_loading.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.473Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.711Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/installation.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/mixed_precision.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.476Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.714Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/docs/unsloth.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.477Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.715Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://docs.axolotl.ai/src/axolotl/integrations/LICENSE.html</loc>
|
||||
<lastmod>2025-10-17T03:35:11.499Z</lastmod>
|
||||
<lastmod>2025-10-17T10:00:35.736Z</lastmod>
|
||||
</url>
|
||||
</urlset>
|
||||
|
||||
Reference in New Issue
Block a user