From 77764de0dd5002c922bd511c08490bd70ce557ca Mon Sep 17 00:00:00 2001
From: Quarto GHA Workflow Runner <quarto-github-actions-publish@example.com>
Date: Thu, 13 Feb 2025 21:02:41 +0000
Subject: [PATCH] Built site for gh-pages

---
 .nojekyll                                  |   2 +-
 docs/dataset-formats/conversation.html     |   4 +-
 docs/dataset-formats/index.html            | 590 ++++++++++++++++-----
 docs/rlhf.html                             | 513 ++++++++++++++++--
 listings.json                              |  13 -
 search.json                                | 102 +++-
 site_libs/quarto-listing/list.min.js       |   2 -
 site_libs/quarto-listing/quarto-listing.js | 254 ---------
 sitemap.xml                                |  76 +--
 9 files changed, 1052 insertions(+), 504 deletions(-)
 delete mode 100644 listings.json
 delete mode 100644 site_libs/quarto-listing/list.min.js
 delete mode 100644 site_libs/quarto-listing/quarto-listing.js
diff --git a/.nojekyll b/.nojekyll
index 51f17ebc2..057f00e37 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-e7966439
\ No newline at end of file
+64f14fad
\ No newline at end of file
diff --git a/docs/dataset-formats/conversation.html b/docs/dataset-formats/conversation.html
index 2f74554a8..769cdcf85 100644
--- a/docs/dataset-formats/conversation.html
+++ b/docs/dataset-formats/conversation.html
@@ -368,7 +368,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 
 <section id="sharegpt" class="level2">
 <h2 class="anchored" data-anchor-id="sharegpt">sharegpt</h2>
-<p>IMPORTANT: ShareGPT is deprecated!. Please see <code>chat_template</code> section below.</p>
+<p>IMPORTANT: ShareGPT is deprecated!. Please see <a href="#chat_template">chat_template</a> section below.</p>
 </section>
 <section id="pygmalion" class="level2">
 <h2 class="anchored" data-anchor-id="pygmalion">pygmalion</h2>
@@ -388,7 +388,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 </div>
 <div class="sourceCode" id="cb2" data-filename="data.jsonl"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"conversations"</span><span class="fu">:</span> <span class="ot">[</span><span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">]</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<p>See <code>config.qmd</code> for full configs and supported templates.</p>
+<p>See <a href="../../docs/config.html">configs</a> for full configs and supported templates.</p>
 <section id="migrating-from-sharegpt" class="level3">
 <h3 class="anchored" data-anchor-id="migrating-from-sharegpt">Migrating from sharegpt</h3>
 <p>Most configs can be adapted as follows:</p>
diff --git a/docs/dataset-formats/index.html b/docs/dataset-formats/index.html
index 80b5cad80..b622b019c 100644
--- a/docs/dataset-formats/index.html
+++ b/docs/dataset-formats/index.html
@@ -6,7 +6,7 @@
 
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
-<meta name="description" content="Supported dataset formats.">
+<meta name="description" content="Guide to Dataset Formats in Axolotl">
 
 <title>Dataset Formats – Axolotl</title>
 <style>
@@ -21,6 +21,40 @@ ul.task-list li input[type="checkbox"] {
   margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
   vertical-align: middle;
 }
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
 </style>
 
 
@@ -31,8 +65,6 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-search/quarto-search.js"></script>
 <meta name="quarto:offset" content="../../">
 <link href="../../favicon.jpg" rel="icon" type="image/jpeg">
-<script src="../../site_libs/quarto-listing/list.min.js"></script>
-<script src="../../site_libs/quarto-listing/quarto-listing.js"></script>
 <script src="../../site_libs/quarto-html/quarto.js"></script>
 <script src="../../site_libs/quarto-html/popper.min.js"></script>
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
@@ -69,65 +101,7 @@ ul.task-list li input[type="checkbox"] {
     "search-label": "Search"
   }
 }</script>
-<script>
 
-  window.document.addEventListener("DOMContentLoaded", function (_event) {
-    const listingTargetEl = window.document.querySelector('#listing-listing .list');
-    if (!listingTargetEl) {
-      // No listing discovered, do not attach.
-      return; 
-    }
-
-    const options = {
-      valueNames: ['listing-title','listing-description',{ data: ['index'] },{ data: ['categories'] },{ data: ['listing-date-sort'] },{ data: ['listing-title-sort'] }],
-      
-      searchColumns: ["listing-title","listing-author"],
-    };
-
-    window['quarto-listings'] = window['quarto-listings'] || {};
-    window['quarto-listings']['listing-listing'] = new List('listing-listing', options);
-
-    if (window['quarto-listing-loaded']) {
-      window['quarto-listing-loaded']();
-    }
-  });
-
-  window.addEventListener('hashchange',() => {
-    if (window['quarto-listing-loaded']) {
-      window['quarto-listing-loaded']();
-    }
-  })
-  </script>
-
-  <script src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script>
-  <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
-
-<script type="text/javascript">
-const typesetMath = (el) => {
-  if (window.MathJax) {
-    // MathJax Typeset
-    window.MathJax.typeset([el]);
-  } else if (window.katex) {
-    // KaTeX Render
-    var mathElements = el.getElementsByClassName("math");
-    var macros = [];
-    for (var i = 0; i < mathElements.length; i++) {
-      var texText = mathElements[i].firstChild;
-      if (mathElements[i].tagName == "SPAN") {
-        window.katex.render(texText.data, mathElements[i], {
-          displayMode: mathElements[i].classList.contains('display'),
-          throwOnError: false,
-          macros: macros,
-          fleqn: false
-        });
-      }
-    }
-  }
-}
-window.Quarto = {
-  typesetMath
-};
-</script>
 
 <link rel="stylesheet" href="../../styles.css">
 </head>
@@ -350,8 +324,46 @@ window.Quarto = {
 </nav>
 <div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
 <!-- margin-sidebar -->
-    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar zindex-bottom">
-        
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">On this page</h2>
+   
+  <ul>
+  <li><a href="#pre-training" id="toc-pre-training" class="nav-link active" data-scroll-target="#pre-training">Pre-training</a>
+  <ul>
+  <li><a href="#pre-training-from-hugging-face-hub-datasets" id="toc-pre-training-from-hugging-face-hub-datasets" class="nav-link" data-scroll-target="#pre-training-from-hugging-face-hub-datasets">Pre-training from Hugging Face hub datasets</a></li>
+  <li><a href="#pre-training-from-local-dataset-files" id="toc-pre-training-from-local-dataset-files" class="nav-link" data-scroll-target="#pre-training-from-local-dataset-files">Pre-training from local dataset files</a></li>
+  <li><a href="#pre-training-without-streaming" id="toc-pre-training-without-streaming" class="nav-link" data-scroll-target="#pre-training-without-streaming">Pre-training without streaming</a></li>
+  <li><a href="#pre-training-dataset-configuration-tips" id="toc-pre-training-dataset-configuration-tips" class="nav-link" data-scroll-target="#pre-training-dataset-configuration-tips">Pre-training dataset configuration tips</a>
+  <ul>
+  <li><a href="#setting-max_steps" id="toc-setting-max_steps" class="nav-link" data-scroll-target="#setting-max_steps">Setting max_steps</a></li>
+  <li><a href="#group_by_length" id="toc-group_by_length" class="nav-link" data-scroll-target="#group_by_length">Group_by_length</a></li>
+  </ul></li>
+  </ul></li>
+  <li><a href="#supervised-fine-tuning-sft" id="toc-supervised-fine-tuning-sft" class="nav-link" data-scroll-target="#supervised-fine-tuning-sft">Supervised fine-tuning (SFT)</a>
+  <ul>
+  <li><a href="#pre-tokenized-dataset" id="toc-pre-tokenized-dataset" class="nav-link" data-scroll-target="#pre-tokenized-dataset">Pre-Tokenized Dataset</a></li>
+  <li><a href="#template-free-dataset" id="toc-template-free-dataset" class="nav-link" data-scroll-target="#template-free-dataset">Template Free Dataset</a></li>
+  <li><a href="#conversation-dataset" id="toc-conversation-dataset" class="nav-link" data-scroll-target="#conversation-dataset">Conversation Dataset</a>
+  <ul>
+  <li><a href="#what-are-chat_templates" id="toc-what-are-chat_templates" class="nav-link" data-scroll-target="#what-are-chat_templates">What are <code>chat_templates</code>?</a></li>
+  <li><a href="#common-conversation-dataset-formats" id="toc-common-conversation-dataset-formats" class="nav-link" data-scroll-target="#common-conversation-dataset-formats">Common Conversation Dataset formats</a></li>
+  <li><a href="#chat-template-usage" id="toc-chat-template-usage" class="nav-link" data-scroll-target="#chat-template-usage">Chat Template Usage</a>
+  <ul class="collapse">
+  <li><a href="#choosing-a-chat_template" id="toc-choosing-a-chat_template" class="nav-link" data-scroll-target="#choosing-a-chat_template">Choosing a <code>chat_template</code></a></li>
+  <li><a href="#setting-chat_template-dataset-keys" id="toc-setting-chat_template-dataset-keys" class="nav-link" data-scroll-target="#setting-chat_template-dataset-keys">Setting <code>chat_template</code> dataset keys</a></li>
+  <li><a href="#handling-masking" id="toc-handling-masking" class="nav-link" data-scroll-target="#handling-masking">Handling masking</a></li>
+  </ul></li>
+  <li><a href="#applying-chat_template" id="toc-applying-chat_template" class="nav-link" data-scroll-target="#applying-chat_template">Applying <code>chat_template</code></a></li>
+  </ul></li>
+  <li><a href="#instruction-dataset" id="toc-instruction-dataset" class="nav-link" data-scroll-target="#instruction-dataset">Instruction Dataset</a>
+  <ul>
+  <li><a href="#custom-instruct-prompt-format" id="toc-custom-instruct-prompt-format" class="nav-link" data-scroll-target="#custom-instruct-prompt-format">Custom Instruct Prompt Format</a></li>
+  </ul></li>
+  </ul></li>
+  <li><a href="#reinforcement-learning-from-human-feedback-rlhf" id="toc-reinforcement-learning-from-human-feedback-rlhf" class="nav-link" data-scroll-target="#reinforcement-learning-from-human-feedback-rlhf">Reinforcement Learning from Human Feedback (RLHF)</a></li>
+  </ul>
+</nav>
     </div>
 <!-- main -->
 <main class="content" id="quarto-document-content">
@@ -363,7 +375,7 @@ window.Quarto = {
 
 <div>
   <div class="description">
-    Supported dataset formats.
+    Guide to Dataset Formats in Axolotl
   </div>
 </div>
 
@@ -380,79 +392,385 @@ window.Quarto = {
 </header>
 
 
-<p>Axolotl supports a variety of dataset formats. It is recommended to use a JSONL format. The schema of the JSONL depends upon the task and the prompt template you wish to use. Instead of a JSONL, you can also use a HuggingFace dataset with columns for each JSONL field.</p>
-<p>Below are these various formats organized by task:</p>
-
-
-
-
-<div class="quarto-listing quarto-listing-container-table" id="listing-listing">
-<table class="quarto-listing-table table">
-<thead>
-<tr>
-<th>
-Title
-</th>
-<th>
-Description
-</th>
-</tr>
-</thead>
-<tbody class="list">
-<tr data-index="0" data-listing-file-modified-sort="1739480475516" data-listing-reading-time-sort="1" data-listing-word-count-sort="92" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
-<td>
-<a href="../../docs/dataset-formats/pretraining.html" class="title listing-title">Pre-training</a>
-</td>
-<td>
-<span class="listing-description">Data format for a pre-training completion task.</span>
-</td>
-</tr>
-<tr data-index="1" data-listing-file-modified-sort="1739480475516" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
-<td>
-<a href="../../docs/dataset-formats/inst_tune.html" class="title listing-title">Instruction Tuning</a>
-</td>
-<td>
-<span class="listing-description">Instruction tuning formats for supervised fine-tuning.</span>
-</td>
-</tr>
-<tr data-index="2" data-listing-file-modified-sort="1739480475516" data-listing-reading-time-sort="4" data-listing-word-count-sort="625" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
-<td>
-<a href="../../docs/dataset-formats/conversation.html" class="title listing-title">Conversation</a>
-</td>
-<td>
-<span class="listing-description">Conversation format for supervised fine-tuning.</span>
-</td>
-</tr>
-<tr data-index="3" data-listing-file-modified-sort="1739480475516" data-listing-reading-time-sort="1" data-listing-word-count-sort="85" data-listing-title-sort="Stepwise Supervised Format" data-listing-filename-sort="stepwise_supervised.qmd">
-<td>
-<a href="../../docs/dataset-formats/stepwise_supervised.html" class="title listing-title">Stepwise Supervised Format</a>
-</td>
-<td>
-<span class="listing-description">Format for datasets with stepwise completions and labels</span>
-</td>
-</tr>
-<tr data-index="4" data-listing-file-modified-sort="1739480475516" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
-<td>
-<a href="../../docs/dataset-formats/template_free.html" class="title listing-title">Template-Free</a>
-</td>
-<td>
-<span class="listing-description">Construct prompts without a template.</span>
-</td>
-</tr>
-<tr data-index="5" data-listing-file-modified-sort="1739480475516" data-listing-reading-time-sort="1" data-listing-word-count-sort="92" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
-<td>
-<a href="../../docs/dataset-formats/tokenized.html" class="title listing-title">Custom Pre-Tokenized Dataset</a>
-</td>
-<td>
-<span class="listing-description">How to use a custom pre-tokenized dataset.</span>
-</td>
-</tr>
-</tbody>
-</table>
-<div class="listing-no-matching d-none">
-No matching items
+<p>Axolotl is a training framework that aims to make the process convenient yet flexible to users by simply passing a config yaml file.</p>
+<p>As there are a lot of available options in Axolotl, this guide aims to provide an simplify the user experience to choosing the proper choice.</p>
+<p>Axolotl supports 3 kinds of training methods: pre-training, supervised fine-tuning, and preference-based post-training (e.g.&nbsp;DPO, ORPO, PRMs). Each method has their own dataset format which are described below.</p>
+<section id="pre-training" class="level2">
+<h2 class="anchored" data-anchor-id="pre-training"><a href="../../docs/dataset-formats/pretraining.html">Pre-training</a></h2>
+<p>When aiming to train on large corpora of text datasets, pre-training is your go-to choice. Due to the size of these datasets, downloading the entire-datasets before beginning training would be prohibitively time-consuming. Axolotl supports <a href="https://huggingface.co/docs/datasets/en/stream">streaming</a> to only load batches into memory at a time.</p>
+<p>A sample format for a pre-training dataset is as follows:</p>
+<div class="sourceCode" id="cb1"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"text"</span><span class="fu">:</span> <span class="st">"first row"</span><span class="fu">}</span></span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"text"</span><span class="fu">:</span> <span class="st">"second row"</span><span class="fu">}</span></span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="er">...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>It is typically recommended to save your dataset as <code>.jsonl</code> due to its flexibility and simplicity.</p>
+<p>Axolotl supports loading from a Hugging Face hub repo or from local files.</p>
+<div class="callout callout-style-default callout-important callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
 </div>
-</div></main> <!-- /main -->
+<div class="callout-title-container flex-fill">
+Important
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>For pre-training only, Axolotl would split texts if it exceeds the context length into multiple smaller prompts.</p>
+</div>
+</div>
+<section id="pre-training-from-hugging-face-hub-datasets" class="level3">
+<h3 class="anchored" data-anchor-id="pre-training-from-hugging-face-hub-datasets">Pre-training from Hugging Face hub datasets</h3>
+<p>As an example, to train using a Hugging Face dataset <code>hf_org/name</code>, you can pass the following config:</p>
+<div class="sourceCode" id="cb2"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">pretraining_dataset</span><span class="kw">:</span><span class="at"> hf_org/name</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="pre-training-from-local-dataset-files" class="level3">
+<h3 class="anchored" data-anchor-id="pre-training-from-local-dataset-files">Pre-training from local dataset files</h3>
+<p>Given a few corpus files: <code>A.jsonl</code>, <code>B.jsonl</code>, and <code>C.jsonl</code>, your config will look like the below:</p>
+<div class="sourceCode" id="cb3"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">pretraining_dataset</span><span class="kw">:</span></span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> json</span></span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">data_files</span><span class="kw">:</span></span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="kw">-</span><span class="at"> A.jsonl</span></span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="kw">-</span><span class="at"> B.jsonl</span></span>
+<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="kw">-</span><span class="at"> C.jsonl</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>While we recommend <code>.jsonl</code>, you can also use the other formats (<code>csv</code>, <code>parquet</code>, <code>arrow</code>, <code>SQL</code>, <code>Webdataset</code>) that are supported by <a href="https://huggingface.co/docs/datasets/loading#local-and-remote-files"><code>Dataset.load_dataset</code></a></p>
+</section>
+<section id="pre-training-without-streaming" class="level3">
+<h3 class="anchored" data-anchor-id="pre-training-without-streaming">Pre-training without streaming</h3>
+<p>On the rare case that the dataset is small and can be loaded entirely into memory, another approach to running pre-training is to use the <code>completion</code> format. This would mean that the entire dataset is pre-tokenized instead of on-demand in streaming.</p>
+<p>One benefit of this is that the tokenization can be performed separately on a CPU-only machine, and then transferred to a GPU machine for training to save costs.</p>
+<p>From Hugging Face:</p>
+<div class="sourceCode" id="cb4"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> hf_org/name</span></span>
+<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> completion</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>From local files (either example works):</p>
+<div class="sourceCode" id="cb5"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> A.jsonl</span></span>
+<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> completion</span></span>
+<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> json</span></span>
+<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">data_files</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"A.jsonl"</span><span class="kw">,</span><span class="at"> </span><span class="st">"B.jsonl"</span><span class="kw">,</span><span class="at"> </span><span class="st">"C.jsonl"</span><span class="kw">]</span></span>
+<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> completion</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="pre-training-dataset-configuration-tips" class="level3">
+<h3 class="anchored" data-anchor-id="pre-training-dataset-configuration-tips">Pre-training dataset configuration tips</h3>
+<section id="setting-max_steps" class="level4">
+<h4 class="anchored" data-anchor-id="setting-max_steps">Setting max_steps</h4>
+<p>When using streaming for large datasets, Axolotl does not know in advance how large the dataset is and does not know when to stop.</p>
+<p>Therefore, it is necessary to set <code>max_steps: int</code> in your config for pre-training to run, so that Axolotl knows when to stop training.</p>
+<p>One step is equal to <code>sequence_len * micro_batch_size * gradient_accumulation_steps * total_num_gpus</code> tokens.</p>
+</section>
+<section id="group_by_length" class="level4">
+<h4 class="anchored" data-anchor-id="group_by_length">Group_by_length</h4>
+<p>It is recommended to leave this off if downloading from Hugging Face hub as it would download the entire dataset which can be very large.</p>
+</section>
+</section>
+</section>
+<section id="supervised-fine-tuning-sft" class="level2">
+<h2 class="anchored" data-anchor-id="supervised-fine-tuning-sft">Supervised fine-tuning (SFT)</h2>
+<p>Supervised fine-tuning is the process of training models to respond to an instruction or chat input.</p>
+<p>As there are a wide variety of dataset formats, Axolotl tries to support a majority of the formats available in public datasets.</p>
+<p>Axolotl provides four approaches for loading datasets, however, it’s easier to work backwards from the dataset you have available to figure out which approach to use.</p>
+<p>A flow chart is as follows:</p>
+<ol type="1">
+<li><p>Do you already have the dataset tokenized? If yes, check <a href="#pre-tokenized-dataset">Pre-Tokenized Dataset</a>.</p></li>
+<li><p>Do you want to format the dataset yourself and manually choose each section to mask? If yes, check <a href="#template-free-dataset">Template Free Dataset</a></p></li>
+<li><p>Is your dataset in a “conversation” format, containing a <code>list[messages]</code>? If yes, check <a href="#conversation-dataset">Conversation Dataset</a></p></li>
+<li><p>Is your dataset in an “instruct” format, containing <code>{ instruction, response }</code>? If yes, check <a href="#instruction-dataset">Instruction Dataset</a></p></li>
+</ol>
+<p>If you went through the flow chart and did not find one that matches, it is recommended to preprocess your dataset into one of the above or create a Github Discussion.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>You can mix and match within each approach or across approaches to train a model on a variety of datasets.</p>
+</div>
+</div>
+<section id="pre-tokenized-dataset" class="level3">
+<h3 class="anchored" data-anchor-id="pre-tokenized-dataset"><a href="../../docs/dataset-formats/tokenized.html">Pre-Tokenized Dataset</a></h3>
+<p>We suggest this approach when you want to bring your own tokenized dataset.</p>
+<p>Axolotl expects the dataset to have three keys: - <code>input_ids</code>: from tokenizing formatted prompt - <code>attention_mask</code>: for masking padding. If you don’t add padding, it would be equal to <code>len(input_ids) * [1]</code> - <code>labels</code>: this is the same as <code>input_ids</code>, however, if you want to mask certain tokens, you would set those indices to <code>-100</code>.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Make sure to add BOS/EOS tokens to your prompt and mask it appropriately.</p>
+</div>
+</div>
+<p>A config for this would look like:</p>
+<div class="sourceCode" id="cb6"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> A.jsonl</span></span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p><code>type:</code> is empty!</p>
+</div>
+</div>
+</section>
+<section id="template-free-dataset" class="level3">
+<h3 class="anchored" data-anchor-id="template-free-dataset"><a href="../../docs/dataset-formats/template_free.html">Template Free Dataset</a></h3>
+<p>We reccomend this approach when you want granular control over the prompt formatting, special tokens, and masking, whilst letting Axolotl handle the tokenization. This is very useful if your dataset has unique prompts that differ across samples and where one single general template wouldn’t suffice.</p>
+<p>In the example below, you could see that there is no proper structure. At the same time, it’s very flexible as there are no constraints on how your prompt can look.</p>
+<div class="sourceCode" id="cb7"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"segments"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"label"</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"text"</span><span class="fu">:</span> <span class="st">"&lt;s&gt;Hello</span><span class="ch">\n</span><span class="st">"</span></span>
+<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"label"</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"text"</span><span class="fu">:</span> <span class="st">"hi there!. "</span></span>
+<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"label"</span><span class="fu">:</span> <span class="kw">false</span><span class="fu">,</span></span>
+<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"text"</span><span class="fu">:</span> <span class="st">"goodbye "</span></span>
+<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"label"</span><span class="fu">:</span> <span class="kw">true</span><span class="fu">,</span></span>
+<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"text"</span><span class="fu">:</span> <span class="st">"farewell&lt;/s&gt;"</span></span>
+<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span></span>
+<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Each prompt must be have a key called <code>segments</code> which is a list of <code>{ text, label }</code>.</p>
+<div class="sourceCode" id="cb8"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> A.jsonl</span></span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> input_output</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="conversation-dataset" class="level3">
+<h3 class="anchored" data-anchor-id="conversation-dataset"><a href="../../docs/dataset-formats/conversation.html">Conversation Dataset</a></h3>
+<p><code>conversation</code> messages are a list of messages which usually contain a <code>role</code> and <code>content</code> key.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Fun fact: Axolotl synonymously refers to “chat” messages as <code>conversation</code> messages due to how FastChat initially used this term to build a widely used <a href="https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py">fastchat conversation</a> method for formatting chat messages prior to the creation of <code>chat_templates</code>.</p>
+</div>
+</div>
+<section id="what-are-chat_templates" class="level4">
+<h4 class="anchored" data-anchor-id="what-are-chat_templates">What are <code>chat_templates</code>?</h4>
+<p>The current most popular and convenient method for inference is to use <code>chat_templates</code> for formatting prompts. Axolotl supports using <code>chat_templates</code> for training to ensure that the model performs in the same environment as in inference.</p>
+<p>Here’s a quick rundown on <code>chat_template</code>: A <code>chat_template</code> is a Jinja2 template which formats a list of messages into a prompt.</p>
+<p>An example of a prompt formatted into a popular template called ChatML can be seen below:</p>
+<p>Single prompt (pretty-printed):</p>
+<div class="sourceCode" id="cb9"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"messages"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span></span>
+<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"Hi"</span></span>
+<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
+<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"How can I help you?"</span></span>
+<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span></span>
+<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"Can you add 3+5?"</span></span>
+<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb9-16"><a href="#cb9-16" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
+<span id="cb9-17"><a href="#cb9-17" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"The answer is 8."</span></span>
+<span id="cb9-18"><a href="#cb9-18" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span></span>
+<span id="cb9-19"><a href="#cb9-19" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb9-20"><a href="#cb9-20" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The ChatML template is as follows:</p>
+<pre class="jinja2"><code>{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'&lt;|im_start|&gt;' + message['role'] + '\n' + message['content'] + '&lt;|im_end|&gt;' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '&lt;|im_start|&gt;assistant\n' }}{% endif %}</code></pre>
+<p>The above prompt formatted into this template will result in:</p>
+<pre><code>&lt;|im_start|&gt;user
+Hi&lt;|im_end|&gt;
+&lt;|im_start|&gt;assistant
+How can I help you?&lt;|im_end|&gt;
+&lt;|im_start|&gt;user
+Can you add 3+5?&lt;|im_end|&gt;
+&lt;|im_start|&gt;assistant
+The answer is 8.&lt;|im_end|&gt;</code></pre>
+<p>By using delimiters (<code>&lt;|im_start|&gt;</code> and <code>&lt;|im_end|&gt;</code>), a prompt separates different speakers which helps the model identify which portion belongs to whom.</p>
+</section>
+<section id="common-conversation-dataset-formats" class="level4">
+<h4 class="anchored" data-anchor-id="common-conversation-dataset-formats">Common Conversation Dataset formats</h4>
+<p>Older conversation datasets with the following format are colloquially called <code>sharegpt</code> datasets.</p>
+<div class="sourceCode" id="cb12"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"conversations"</span><span class="fu">:</span> <span class="ot">[</span><span class="fu">{</span><span class="dt">"from"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"value"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">]</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Newer conversation datasets usually follow the OpenAI format.</p>
+<div class="sourceCode" id="cb13"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"messages"</span><span class="fu">:</span> <span class="ot">[</span><span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">]</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Axolotl supports both as well as allowing customization of any kind of key.</p>
+</section>
+<section id="chat-template-usage" class="level4">
+<h4 class="anchored" data-anchor-id="chat-template-usage"><a href="../../docs/dataset-formats/conversation.html#chat_template">Chat Template Usage</a></h4>
+<p>To properly use this method, it is important to identify three things:</p>
+<ol type="1">
+<li><p>Which <code>chat_template</code> would you use?</p></li>
+<li><p>What are the keys in your dataset, and what are the possible roles? For example, in OpenAI format, the keys would be <code>messages</code>, <code>role</code>, and <code>content</code>, respectively, whereas the possible roles are <code>system</code>, <code>user</code>, and <code>assistant</code>.</p></li>
+<li><p>What do you want to mask? For instance, only assistant messages, only last message, or nothing.</p></li>
+</ol>
+<section id="choosing-a-chat_template" class="level5">
+<h5 class="anchored" data-anchor-id="choosing-a-chat_template">Choosing a <code>chat_template</code></h5>
+<p>There are a lot of <code>chat_templates</code> out there. Axolotl supports the common ones: <a href="https://github.com/axolotl-ai-cloud/axolotl/blob/860609392184cf62a7e0ca676658b170e059ce6c/src/axolotl/utils/chat_templates.py#L17">supported chat templates</a>. For example, to use ChatML, it would be <code>chat_template: chatml</code>.</p>
+<p>However, it is also possible to use the already configured template within the tokenizer by specifying <code>chat_template: tokenizer_default</code>. If you want a fallback (in case some tokenizer does not have it pre-configured), you can do <code>chat_template: tokenizer_default_fallback_chatml</code> to fallback to the ChatML template if a tokenizer template was not found.</p>
+<p>One last but powerful approach is to bring your own template. This can be set via:</p>
+<div class="sourceCode" id="cb14"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">chat_template_jinja</span><span class="kw">:</span><span class="co"> # your template</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="setting-chat_template-dataset-keys" class="level5">
+<h5 class="anchored" data-anchor-id="setting-chat_template-dataset-keys">Setting <code>chat_template</code> dataset keys</h5>
+<p>We currently default to OpenAI format for dataset keys, so if that’s your current dataset format, there’s nothing to do here.</p>
+<p>If your dataset format is different, here are the keys you should check (with their defaults):</p>
+<div class="sourceCode" id="cb15"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="at">    ...</span></span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_messages</span><span class="kw">:</span><span class="at"> messages</span></span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">message_field_role</span><span class="kw">:</span><span class="at"> role</span></span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">message_field_content</span><span class="kw">:</span><span class="at"> content</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>In some <code>chat_templates</code> (e.g.&nbsp;<a href="https://huggingface.co/google/gemma-2b-it/blob/main/tokenizer_config.json#L1507">Gemma</a>), the roles are hardcoded to <code>user</code> and <code>assistant</code>. Consequently, you may find it necessary to map the roles in your dataset to these above. We currently have some defaults that should work for common datasets, but if you get a <code>KeyError</code>, it would be necessary to add mapping for your roles. Here is an example of how it would look like:</p>
+<div class="sourceCode" id="cb16"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="at">    ...</span></span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">roles</span><span class="kw">:</span></span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">assistant</span><span class="kw">:</span></span>
+<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> gpt</span></span>
+<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> model</span></span>
+<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">user</span><span class="kw">:</span></span>
+<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> human</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>In the example above, all <code>gpt</code> and <code>model</code> values are converted to <code>assistant</code>. All <code>human</code> values are converted to <code>user.</code></p>
+</section>
+<section id="handling-masking" class="level5">
+<h5 class="anchored" data-anchor-id="handling-masking">Handling masking</h5>
+<p>The common use case for <code>chat_template</code> is for chat messages, therefore, it is common to mask all non-assistant messages. Assistant messages refer to the bot messages that you want the model to learn on.</p>
+<p>To train on all <code>assistant</code> messages, you would set the following configs.</p>
+<div class="sourceCode" id="cb17"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="at">    ...</span></span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">roles_to_train</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"assistant"</span><span class="kw">]</span></span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">train_on_eos</span><span class="kw">:</span><span class="at"> </span><span class="st">"turn"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The <code>train_on_eos</code> config means that it would mask all EOS tokens for turns that aren’t assistant-turns. The other options are: <code>all</code> and <code>last</code> to choose which EOS to train on.</p>
+<p>Perhaps, you want to train on <code>assistant</code> and <code>narrator</code> roles, you can simply add <code>narrator</code> to the list of <code>roles_to_train</code>. You would also need to add it to the mapping of <code>roles</code> above.</p>
+<div class="sourceCode" id="cb18"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="at">    ...</span></span>
+<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">roles_to_train</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"assistant"</span><span class="kw">,</span><span class="at"> </span><span class="st">"narrator"</span><span class="kw">]</span></span>
+<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">roles</span><span class="kw">:</span></span>
+<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">assistant</span><span class="kw">:</span></span>
+<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> gpt</span></span>
+<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> model</span></span>
+<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">user</span><span class="kw">:</span></span>
+<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> human</span></span>
+<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">narrator</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"narrator"</span><span class="kw">]</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+</section>
+<section id="applying-chat_template" class="level4">
+<h4 class="anchored" data-anchor-id="applying-chat_template">Applying <code>chat_template</code></h4>
+<p>Once all the above steps are completed, you could combine all these configs together to form a bespoke configuration for your custom dataset. The final step would be to correctly set the EOS token in your config:</p>
+<div class="sourceCode" id="cb19"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> A.jsonl</span></span>
+<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template</span></span>
+<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="co">    # step 1</span></span>
+<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">chat_template</span><span class="kw">:</span><span class="at"> chatml</span></span>
+<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a><span class="co">    # step 2</span></span>
+<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_messages</span><span class="kw">:</span><span class="at"> messages</span></span>
+<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">message_field_role</span><span class="kw">:</span><span class="at"> role</span></span>
+<span id="cb19-11"><a href="#cb19-11" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">message_field_content</span><span class="kw">:</span><span class="at"> content</span></span>
+<span id="cb19-12"><a href="#cb19-12" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb19-13"><a href="#cb19-13" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">roles</span><span class="kw">:</span></span>
+<span id="cb19-14"><a href="#cb19-14" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">assistant</span><span class="kw">:</span></span>
+<span id="cb19-15"><a href="#cb19-15" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> gpt</span></span>
+<span id="cb19-16"><a href="#cb19-16" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> model</span></span>
+<span id="cb19-17"><a href="#cb19-17" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> assistant</span></span>
+<span id="cb19-18"><a href="#cb19-18" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">user</span><span class="kw">:</span></span>
+<span id="cb19-19"><a href="#cb19-19" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> human</span></span>
+<span id="cb19-20"><a href="#cb19-20" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="kw">-</span><span class="at"> user</span></span>
+<span id="cb19-21"><a href="#cb19-21" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb19-22"><a href="#cb19-22" aria-hidden="true" tabindex="-1"></a><span class="co">    # step 3</span></span>
+<span id="cb19-23"><a href="#cb19-23" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">roles_to_train</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"assistant"</span><span class="kw">]</span></span>
+<span id="cb19-24"><a href="#cb19-24" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">train_on_eos</span><span class="kw">:</span><span class="at"> </span><span class="st">"turn"</span></span>
+<span id="cb19-25"><a href="#cb19-25" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb19-26"><a href="#cb19-26" aria-hidden="true" tabindex="-1"></a><span class="fu">special_tokens</span><span class="kw">:</span></span>
+<span id="cb19-27"><a href="#cb19-27" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">eos_token</span><span class="kw">:</span><span class="at"> &lt;|im_end|&gt;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If this config were to be applied to the sample dataset above, the output would look as such (which can be retrieved via <code>axolotl preprocess config.yaml --debug</code>):</p>
+<pre><code>&lt;|im_start|&gt;(-100, 128256) user(-100, 882)
+(-100, 198) Hi(-100, 13347) &lt;|im_end|&gt;(-100, 128257)
+(-100, 198) &lt;|im_start|&gt;(-100, 128256) assistant(-100, 78191)
+(-100, 198) How(4438, 4438)  can(649, 649)  I(358, 358)  help(1520, 1520)  you(499, 499) ?(30, 30) &lt;|im_end|&gt;(128257, 128257)
+(-100, 198) &lt;|im_start|&gt;(-100, 128256) user(-100, 882)
+(-100, 198) Can(-100, 6854)  you(-100, 499)  add(-100, 923)  (-100, 220) 3(-100, 18) +(-100, 10) 5(-100, 20) ?(-100, 30) &lt;|im_end|&gt;(-100, 128257)
+(-100, 198) &lt;|im_start|&gt;(-100, 128256) assistant(-100, 78191)
+(-100, 198) The(791, 791)  answer(4320, 4320)  is(374, 374)  (220, 220) 8(23, 23) .(13, 13) &lt;|im_end|&gt;(128257, 128257)
+(-100, 198)</code></pre>
+<p>The first number refers to the label, the second refers to the <code>token_id</code>. For example, <code>-100</code> labels appear on non-assistant portions, meaning that they are masked during. For assistant portions, the label is the same as the <code>token_id</code>.</p>
+</section>
+</section>
+<section id="instruction-dataset" class="level3">
+<h3 class="anchored" data-anchor-id="instruction-dataset"><a href="../../docs/dataset-formats/inst_tune.html">Instruction Dataset</a></h3>
+<p>Instruction datasets are used to train instruction-following models and comprise a prompt, containing an instruction, and a single response. In contrast to chat datasets which may be multi-turn, instruct datasets are typically single-turn.</p>
+<p>An example is of a common format called Alpaca:</p>
+<div class="sourceCode" id="cb21"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"output"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Using those keys, a prompt can be built based on it.</p>
+<pre><code>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+
+### Instruction:
+{instruction}
+
+### Input:
+{input}
+
+### Response:
+{output}</code></pre>
+<p>This can be configured as such:</p>
+<div class="sourceCode" id="cb23"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> A.jsonl</span></span>
+<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> alpaca</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Axolotl supports many kinds of instruction dataset. All of them can be found here (https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/inst_tune.html) with their respective type and sample row format.</p>
+<section id="custom-instruct-prompt-format" class="level4">
+<h4 class="anchored" data-anchor-id="custom-instruct-prompt-format">Custom Instruct Prompt Format</h4>
+<p>Due to the myriad possibilities of instruction formats, Axolotl allows customizing your own instruction format without having to dive into the code directly.</p>
+<p>In the example below, a sample row is used to output in <code>mistral_v1</code> format.</p>
+<div class="sourceCode" id="cb24"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span><span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="dt">"output"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb25"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> repo</span></span>
+<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span></span>
+<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">system_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">""</span></span>
+<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">field_system</span><span class="kw">:</span></span>
+<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">field_instruction</span><span class="kw">:</span><span class="at"> input</span></span>
+<span id="cb25-8"><a href="#cb25-8" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">field_input</span><span class="kw">:</span></span>
+<span id="cb25-9"><a href="#cb25-9" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">field_output</span><span class="kw">:</span><span class="at"> output</span></span>
+<span id="cb25-10"><a href="#cb25-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb25-11"><a href="#cb25-11" aria-hidden="true" tabindex="-1"></a><span class="co">      # multi-line example with input</span></span>
+<span id="cb25-12"><a href="#cb25-12" aria-hidden="true" tabindex="-1"></a><span class="fu">      format</span><span class="kw">: </span><span class="ch">|-</span></span>
+<span id="cb25-13"><a href="#cb25-13" aria-hidden="true" tabindex="-1"></a>        [INST] {instruction} {input} [/INST]</span>
+<span id="cb25-14"><a href="#cb25-14" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb25-15"><a href="#cb25-15" aria-hidden="true" tabindex="-1"></a><span class="co">      # single-line example without input</span></span>
+<span id="cb25-16"><a href="#cb25-16" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">no_input_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"[INST] {instruction} [/INST]"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The config sets that the <code>field_instruction</code> is actually named <code>input</code>, and the <code>field_input</code> is empty as we don’t have an <code>input</code> in this sample. Generally, <code>instruction</code> can be thought as the question to the model, and <code>input</code> as the additional information with <code>output</code> being the response. It is not necessary to have an <code>input</code> nor <code>system</code>. In the end, the most important part is to understand what format you want it to look like and how you can customize this to your use case.</p>
+</section>
+</section>
+</section>
+<section id="reinforcement-learning-from-human-feedback-rlhf" class="level2">
+<h2 class="anchored" data-anchor-id="reinforcement-learning-from-human-feedback-rlhf">Reinforcement Learning from Human Feedback (RLHF)</h2>
+<p>As there are multiple RLHF methods with their own dataset requirements. Please see <a href="../../docs/rlhf.html">RLHF datasets</a> documentation for more detail.</p>
+
+
+</section>
+
+</main> <!-- /main -->
 <script id="quarto-html-after-body" type="application/javascript">
 window.document.addEventListener("DOMContentLoaded", function (event) {
   const toggleBodyColorMode = (bsSheetEl) => {
diff --git a/docs/rlhf.html b/docs/rlhf.html
index 60dc5c23c..0b1f1916c 100644
--- a/docs/rlhf.html
+++ b/docs/rlhf.html
@@ -330,7 +330,48 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
    
   <ul>
   <li><a href="#overview" id="toc-overview" class="nav-link active" data-scroll-target="#overview">Overview</a></li>
-  <li><a href="#rlhf-using-axolotl" id="toc-rlhf-using-axolotl" class="nav-link" data-scroll-target="#rlhf-using-axolotl">RLHF using Axolotl</a></li>
+  <li><a href="#rlhf-using-axolotl" id="toc-rlhf-using-axolotl" class="nav-link" data-scroll-target="#rlhf-using-axolotl">RLHF using Axolotl</a>
+  <ul class="collapse">
+  <li><a href="#dpo" id="toc-dpo" class="nav-link" data-scroll-target="#dpo">DPO</a>
+  <ul class="collapse">
+  <li><a href="#chatml.argilla" id="toc-chatml.argilla" class="nav-link" data-scroll-target="#chatml.argilla">chatml.argilla</a></li>
+  <li><a href="#chatml.argilla_chat" id="toc-chatml.argilla_chat" class="nav-link" data-scroll-target="#chatml.argilla_chat">chatml.argilla_chat</a></li>
+  <li><a href="#chatml.icr" id="toc-chatml.icr" class="nav-link" data-scroll-target="#chatml.icr">chatml.icr</a></li>
+  <li><a href="#chatml.intel" id="toc-chatml.intel" class="nav-link" data-scroll-target="#chatml.intel">chatml.intel</a></li>
+  <li><a href="#chatml.prompt_pairs" id="toc-chatml.prompt_pairs" class="nav-link" data-scroll-target="#chatml.prompt_pairs">chatml.prompt_pairs</a></li>
+  <li><a href="#chatml.ultra" id="toc-chatml.ultra" class="nav-link" data-scroll-target="#chatml.ultra">chatml.ultra</a></li>
+  <li><a href="#llama3.argilla" id="toc-llama3.argilla" class="nav-link" data-scroll-target="#llama3.argilla">llama3.argilla</a></li>
+  <li><a href="#llama3.argilla_chat" id="toc-llama3.argilla_chat" class="nav-link" data-scroll-target="#llama3.argilla_chat">llama3.argilla_chat</a></li>
+  <li><a href="#llama3.icr" id="toc-llama3.icr" class="nav-link" data-scroll-target="#llama3.icr">llama3.icr</a></li>
+  <li><a href="#llama3.intel" id="toc-llama3.intel" class="nav-link" data-scroll-target="#llama3.intel">llama3.intel</a></li>
+  <li><a href="#llama3.prompt_pairs" id="toc-llama3.prompt_pairs" class="nav-link" data-scroll-target="#llama3.prompt_pairs">llama3.prompt_pairs</a></li>
+  <li><a href="#llama3.ultra" id="toc-llama3.ultra" class="nav-link" data-scroll-target="#llama3.ultra">llama3.ultra</a></li>
+  <li><a href="#zephyr.nectar" id="toc-zephyr.nectar" class="nav-link" data-scroll-target="#zephyr.nectar">zephyr.nectar</a></li>
+  <li><a href="#chat_template.default" id="toc-chat_template.default" class="nav-link" data-scroll-target="#chat_template.default">chat_template.default</a></li>
+  <li><a href="#user_defined.default" id="toc-user_defined.default" class="nav-link" data-scroll-target="#user_defined.default">user_defined.default</a></li>
+  </ul></li>
+  <li><a href="#ipo" id="toc-ipo" class="nav-link" data-scroll-target="#ipo">IPO</a></li>
+  <li><a href="#orpo" id="toc-orpo" class="nav-link" data-scroll-target="#orpo">ORPO</a>
+  <ul class="collapse">
+  <li><a href="#chat_template.argilla" id="toc-chat_template.argilla" class="nav-link" data-scroll-target="#chat_template.argilla">chat_template.argilla</a></li>
+  </ul></li>
+  <li><a href="#kto" id="toc-kto" class="nav-link" data-scroll-target="#kto">KTO</a>
+  <ul class="collapse">
+  <li><a href="#chatml.argilla-1" id="toc-chatml.argilla-1" class="nav-link" data-scroll-target="#chatml.argilla-1">chatml.argilla</a></li>
+  <li><a href="#chatml.argilla_chat-1" id="toc-chatml.argilla_chat-1" class="nav-link" data-scroll-target="#chatml.argilla_chat-1">chatml.argilla_chat</a></li>
+  <li><a href="#chatml.intel-1" id="toc-chatml.intel-1" class="nav-link" data-scroll-target="#chatml.intel-1">chatml.intel</a></li>
+  <li><a href="#chatml.prompt_pairs-1" id="toc-chatml.prompt_pairs-1" class="nav-link" data-scroll-target="#chatml.prompt_pairs-1">chatml.prompt_pairs</a></li>
+  <li><a href="#chatml.ultra-1" id="toc-chatml.ultra-1" class="nav-link" data-scroll-target="#chatml.ultra-1">chatml.ultra</a></li>
+  <li><a href="#llama3.argilla-1" id="toc-llama3.argilla-1" class="nav-link" data-scroll-target="#llama3.argilla-1">llama3.argilla</a></li>
+  <li><a href="#llama3.argilla_chat-1" id="toc-llama3.argilla_chat-1" class="nav-link" data-scroll-target="#llama3.argilla_chat-1">llama3.argilla_chat</a></li>
+  <li><a href="#llama3.intel-1" id="toc-llama3.intel-1" class="nav-link" data-scroll-target="#llama3.intel-1">llama3.intel</a></li>
+  <li><a href="#llama3.prompt_pairs-1" id="toc-llama3.prompt_pairs-1" class="nav-link" data-scroll-target="#llama3.prompt_pairs-1">llama3.prompt_pairs</a></li>
+  <li><a href="#llama3.ultra-1" id="toc-llama3.ultra-1" class="nav-link" data-scroll-target="#llama3.ultra-1">llama3.ultra</a></li>
+  <li><a href="#user_defined.default-1" id="toc-user_defined.default-1" class="nav-link" data-scroll-target="#user_defined.default-1">user_defined.default</a></li>
+  </ul></li>
+  <li><a href="#using-local-dataset-files" id="toc-using-local-dataset-files" class="nav-link" data-scroll-target="#using-local-dataset-files">Using local dataset files</a></li>
+  <li><a href="#trl-auto-unwrapping-for-peft" id="toc-trl-auto-unwrapping-for-peft" class="nav-link" data-scroll-target="#trl-auto-unwrapping-for-peft">TRL auto-unwrapping for PEFT</a></li>
+  </ul></li>
   </ul>
 </nav>
     </div>
@@ -361,23 +402,49 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 </header>
 
 
-<section id="overview" class="level3">
-<h3 class="anchored" data-anchor-id="overview">Overview</h3>
+<section id="overview" class="level1">
+<h1>Overview</h1>
 <p>Reinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback. Various methods include, but not limited to:</p>
 <ul>
 <li>Proximal Policy Optimization (PPO) (not yet supported in axolotl)</li>
-<li>Direct Preference Optimization (DPO)</li>
-<li>Identity Preference Optimization (IPO)</li>
+<li><a href="#dpo">Direct Preference Optimization (DPO)</a></li>
+<li><a href="#ipo">Identity Preference Optimization (IPO)</a></li>
+<li><a href="#kto">Kahneman-Tversky Optimization (KTO)</a></li>
+<li><a href="#orpo">Odds Ratio Preference Optimization (ORPO)</a></li>
 </ul>
 </section>
-<section id="rlhf-using-axolotl" class="level3">
-<h3 class="anchored" data-anchor-id="rlhf-using-axolotl">RLHF using Axolotl</h3>
-<blockquote class="blockquote">
-<p>[!IMPORTANT] This is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.</p>
-</blockquote>
-<p>The various RL training methods are implemented in trl and wrapped via axolotl. Below are various examples with how you can use various preference datasets to train models that use ChatML</p>
-<section id="dpo" class="level4">
-<h4 class="anchored" data-anchor-id="dpo">DPO</h4>
+<section id="rlhf-using-axolotl" class="level1">
+<h1>RLHF using Axolotl</h1>
+<div class="callout callout-style-default callout-important callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Important
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>This is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.</p>
+</div>
+</div>
+<p>We rely on the <a href="https://github.com/huggingface/trl">TRL</a> library for implementations of various RL training methods, which we wrap around to expose in axolotl. Each method has their own supported ways of loading datasets and prompt formats.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>You can find what each method supports by going into <code>src/axolotl/prompt_strategies/{method}</code> where <code>{method}</code> is one of our supported methods. The <code>type:</code> can be retrieved from <code>{method}.{function_name}</code>.</p>
+</div>
+</div>
+<section id="dpo" class="level2">
+<h2 class="anchored" data-anchor-id="dpo">DPO</h2>
+<p>Example config:</p>
 <div class="sourceCode" id="cb1"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> Intel/orca_dpo_pairs</span></span>
@@ -386,54 +453,390 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
 <span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences</span></span>
 <span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
 <span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>DPO supports the following types with the following dataset format:</p>
+<section id="chatml.argilla" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.argilla">chatml.argilla</h3>
+<div class="sourceCode" id="cb2"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen_response"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected_response"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </section>
-<section id="ipo" class="level4">
-<h4 class="anchored" data-anchor-id="ipo">IPO</h4>
-<div class="sourceCode" id="cb2"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> ipo</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<section id="chatml.argilla_chat" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.argilla_chat">chatml.argilla_chat</h3>
+<div class="sourceCode" id="cb3"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span><span class="fu">,</span></span>
+<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </section>
-<section id="orpo" class="level4">
-<h4 class="anchored" data-anchor-id="orpo">ORPO</h4>
+<section id="chatml.icr" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.icr">chatml.icr</h3>
+<div class="sourceCode" id="cb4"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="chatml.intel" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.intel">chatml.intel</h3>
+<div class="sourceCode" id="cb5"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="chatml.prompt_pairs" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.prompt_pairs">chatml.prompt_pairs</h3>
+<div class="sourceCode" id="cb6"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="chatml.ultra" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.ultra">chatml.ultra</h3>
+<div class="sourceCode" id="cb7"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span><span class="fu">,</span></span>
+<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.argilla" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.argilla">llama3.argilla</h3>
+<div class="sourceCode" id="cb8"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen_response"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected_response"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.argilla_chat" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.argilla_chat">llama3.argilla_chat</h3>
+<div class="sourceCode" id="cb9"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span><span class="fu">,</span></span>
+<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.icr" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.icr">llama3.icr</h3>
+<div class="sourceCode" id="cb10"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.intel" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.intel">llama3.intel</h3>
+<div class="sourceCode" id="cb11"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.prompt_pairs" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.prompt_pairs">llama3.prompt_pairs</h3>
+<div class="sourceCode" id="cb12"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.ultra" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.ultra">llama3.ultra</h3>
+<div class="sourceCode" id="cb13"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span><span class="fu">,</span></span>
+<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="zephyr.nectar" class="level3">
+<h3 class="anchored" data-anchor-id="zephyr.nectar">zephyr.nectar</h3>
+<div class="sourceCode" id="cb14"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"answers"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"rank"</span><span class="fu">:</span> <span class="dv">1</span></span>
+<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb14-10"><a href="#cb14-10" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"rank"</span><span class="fu">:</span> <span class="dv">2</span></span>
+<span id="cb14-11"><a href="#cb14-11" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span></span>
+<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a>        <span class="er">//</span> <span class="er">...</span> <span class="er">more</span> <span class="er">answers</span> <span class="er">with</span> <span class="er">ranks</span></span>
+<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="chat_template.default" class="level3">
+<h3 class="anchored" data-anchor-id="chat_template.default">chat_template.default</h3>
+<div class="sourceCode" id="cb15"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.default</span></span>
+<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_messages</span><span class="kw">:</span><span class="at"> </span><span class="st">"messages"</span></span>
+<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_chosen</span><span class="kw">:</span><span class="at"> </span><span class="st">"chosen"</span></span>
+<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_rejected</span><span class="kw">:</span><span class="at"> </span><span class="st">"rejected"</span></span>
+<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">message_field_role</span><span class="kw">:</span><span class="at"> </span><span class="st">"role"</span></span>
+<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">message_field_content</span><span class="kw">:</span><span class="at"> </span><span class="st">"content"</span></span>
+<span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">roles</span><span class="kw">:</span></span>
+<span id="cb15-12"><a href="#cb15-12" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">user</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"user"</span><span class="kw">]</span></span>
+<span id="cb15-13"><a href="#cb15-13" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">assistant</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"assistant"</span><span class="kw">]</span></span>
+<span id="cb15-14"><a href="#cb15-14" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="fu">system</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"system"</span><span class="kw">]</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Sample input format:</p>
+<div class="sourceCode" id="cb16"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"messages"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"system"</span><span class="fu">,</span></span>
+<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span></span>
+<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span></span>
+<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a>            <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a>        <span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a>        <span class="er">//</span> <span class="er">...</span> <span class="er">more</span> <span class="er">messages</span></span>
+<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span><span class="fu">,</span></span>
+<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="fu">{</span></span>
+<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a>        <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
+<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a>        <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a>    <span class="fu">},</span></span>
+<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="fu">{</span></span>
+<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a>        <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
+<span id="cb16-19"><a href="#cb16-19" aria-hidden="true" tabindex="-1"></a>        <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb16-20"><a href="#cb16-20" aria-hidden="true" tabindex="-1"></a>    <span class="fu">}</span></span>
+<span id="cb16-21"><a href="#cb16-21" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="user_defined.default" class="level3">
+<h3 class="anchored" data-anchor-id="user_defined.default">user_defined.default</h3>
+<p>For custom behaviors,</p>
+<div class="sourceCode" id="cb17"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
+<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> user_defined.default</span></span>
+<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">"prompt"</span></span>
+<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_system</span><span class="kw">:</span><span class="at"> </span><span class="st">"system"</span></span>
+<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_chosen</span><span class="kw">:</span><span class="at"> </span><span class="st">"chosen"</span></span>
+<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_rejected</span><span class="kw">:</span><span class="at"> </span><span class="st">"rejected"</span></span>
+<span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">prompt_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{prompt}"</span></span>
+<span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">chosen_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{chosen}"</span></span>
+<span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">rejected_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{rejected}"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The input format is a simple JSON input with customizable fields based on the above config.</p>
+<div class="sourceCode" id="cb18"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span>  <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+</section>
+<section id="ipo" class="level2">
+<h2 class="anchored" data-anchor-id="ipo">IPO</h2>
+<p>As IPO is just DPO with a different loss function, all supported options for DPO works here.</p>
+<div class="sourceCode" id="cb19"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> ipo</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="orpo" class="level2">
+<h2 class="anchored" data-anchor-id="orpo">ORPO</h2>
 <p>Paper: https://arxiv.org/abs/2403.07691</p>
-<div class="sourceCode" id="cb3"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> orpo</span></span>
-<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="fu">orpo_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
-<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
-<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="fu">chat_template</span><span class="kw">:</span><span class="at"> chatml</span></span>
-<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
-<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned</span></span>
-<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.argilla</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> orpo</span></span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="fu">orpo_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="fu">chat_template</span><span class="kw">:</span><span class="at"> chatml</span></span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned</span></span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.argilla</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>ORPO supports the following types with the following dataset format:</p>
+<section id="chat_template.argilla" class="level3">
+<h3 class="anchored" data-anchor-id="chat_template.argilla">chat_template.argilla</h3>
+<div class="sourceCode" id="cb21"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span>  <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span>  <span class="er">//</span> <span class="er">if</span> <span class="er">available</span><span class="fu">,</span> <span class="er">will</span> <span class="er">be</span> <span class="er">taken</span> <span class="er">as</span> <span class="er">user</span> <span class="er">message</span> <span class="er">for</span> <span class="er">single-turn</span> <span class="er">instead</span> <span class="er">of</span> <span class="er">from</span> <span class="er">list</span> <span class="er">below</span></span>
+<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a>    <span class="er">//</span> <span class="er">chosen/rejected</span> <span class="er">should</span> <span class="er">be</span> <span class="er">same</span> <span class="er">till</span> <span class="er">last</span> <span class="er">content</span> <span class="er">and</span> <span class="er">only</span> <span class="er">even-number</span> <span class="er">of</span> <span class="er">alternating</span> <span class="er">user/assistant</span> <span class="er">turns</span></span>
+<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span><span class="fu">,</span></span>
+<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb21-12"><a href="#cb21-12" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb21-13"><a href="#cb21-13" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb21-14"><a href="#cb21-14" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </section>
-<section id="kto" class="level4">
-<h4 class="anchored" data-anchor-id="kto">KTO</h4>
-<div class="sourceCode" id="cb4"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
-<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span></span>
-<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="fu">kto_desirable_weight</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.2</span></span>
-<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
-<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
-<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned-kto</span></span>
-<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> llama3.ultra</span></span>
-<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
-<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
-<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing_kwargs</span><span class="kw">:</span></span>
-<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">use_reentrant</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </section>
-<section id="using-local-dataset-files" class="level4">
-<h4 class="anchored" data-anchor-id="using-local-dataset-files">Using local dataset files</h4>
-<div class="sourceCode" id="cb5"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
-<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span></span>
-<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">data_files</span><span class="kw">:</span></span>
-<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="kw">-</span><span class="at"> orca_rlhf.jsonl</span></span>
-<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
-<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<section id="kto" class="level2">
+<h2 class="anchored" data-anchor-id="kto">KTO</h2>
+<div class="sourceCode" id="cb22"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
+<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span></span>
+<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="fu">kto_desirable_weight</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.2</span></span>
+<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
+<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned-kto</span></span>
+<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> llama3.ultra</span></span>
+<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
+<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing_kwargs</span><span class="kw">:</span></span>
+<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">use_reentrant</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>KTO supports the following types with the following dataset format:</p>
+<section id="chatml.argilla-1" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.argilla-1">chatml.argilla</h3>
+<div class="sourceCode" id="cb23"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </section>
-<section id="trl-autounwrap-for-peft" class="level4">
-<h4 class="anchored" data-anchor-id="trl-autounwrap-for-peft">Trl autounwrap for peft</h4>
-<p>Trl supports autounwrapping peft models, so that a ref model does not need to be additionally loaded, leading to less VRAM needed. This is on by default. To turn it off, pass the following config.</p>
-<div class="sourceCode" id="cb6"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># load ref model when adapter training.</span></span>
-<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_adapter_ref_model</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<section id="chatml.argilla_chat-1" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.argilla_chat-1">chatml.argilla_chat</h3>
+<div class="sourceCode" id="cb24"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span><span class="fu">,</span></span>
+<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="chatml.intel-1" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.intel-1">chatml.intel</h3>
+<div class="sourceCode" id="cb25"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="chatml.prompt_pairs-1" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.prompt_pairs-1">chatml.prompt_pairs</h3>
+<div class="sourceCode" id="cb26"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="chatml.ultra-1" class="level3">
+<h3 class="anchored" data-anchor-id="chatml.ultra-1">chatml.ultra</h3>
+<div class="sourceCode" id="cb27"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.argilla-1" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.argilla-1">llama3.argilla</h3>
+<div class="sourceCode" id="cb28"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.argilla_chat-1" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.argilla_chat-1">llama3.argilla_chat</h3>
+<div class="sourceCode" id="cb29"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="ot">[</span></span>
+<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
+<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a>        <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
+<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a>    <span class="ot">]</span></span>
+<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.intel-1" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.intel-1">llama3.intel</h3>
+<div class="sourceCode" id="cb30"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.prompt_pairs-1" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.prompt_pairs-1">llama3.prompt_pairs</h3>
+<div class="sourceCode" id="cb31"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="llama3.ultra-1" class="level3">
+<h3 class="anchored" data-anchor-id="llama3.ultra-1">llama3.ultra</h3>
+<div class="sourceCode" id="cb32"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="user_defined.default-1" class="level3">
+<h3 class="anchored" data-anchor-id="user_defined.default-1">user_defined.default</h3>
+<p>For custom behaviors,</p>
+<div class="sourceCode" id="cb33"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
+<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
+<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
+<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> user_defined.default</span></span>
+<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb33-7"><a href="#cb33-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">"prompt"</span></span>
+<span id="cb33-8"><a href="#cb33-8" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_system</span><span class="kw">:</span><span class="at"> </span><span class="st">"system"</span></span>
+<span id="cb33-9"><a href="#cb33-9" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_completion</span><span class="kw">:</span><span class="at"> </span><span class="st">"completion"</span></span>
+<span id="cb33-10"><a href="#cb33-10" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">field_label</span><span class="kw">:</span><span class="at"> </span><span class="st">"label"</span></span>
+<span id="cb33-11"><a href="#cb33-11" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">prompt_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{prompt}"</span></span>
+<span id="cb33-12"><a href="#cb33-12" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">completion_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{completion}"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The input format is a simple JSON input with customizable fields based on the above config.</p>
+<div class="sourceCode" id="cb34"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
+<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span>  <span class="er">//</span> <span class="er">optional</span></span>
+<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
+<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a>    <span class="dt">"label"</span><span class="fu">:</span> <span class="st">"..."</span></span>
+<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+</section>
+<section id="using-local-dataset-files" class="level2">
+<h2 class="anchored" data-anchor-id="using-local-dataset-files">Using local dataset files</h2>
+<div class="sourceCode" id="cb35"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
+<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span></span>
+<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">data_files</span><span class="kw">:</span></span>
+<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a><span class="at">      </span><span class="kw">-</span><span class="at"> orca_rlhf.jsonl</span></span>
+<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
+<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="trl-auto-unwrapping-for-peft" class="level2">
+<h2 class="anchored" data-anchor-id="trl-auto-unwrapping-for-peft">TRL auto-unwrapping for PEFT</h2>
+<p>TRL supports auto-unwrapping PEFT models for RL training paradigms which rely on a reference model. This significantly reduces memory pressure as an additional refreference model does not need to be loaded, and reference model log-probabilities can be obtained by disabling PEFT adapters. This is enabled by default. To turn it off, pass the following config:</p>
+<div class="sourceCode" id="cb36"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="co"># load ref model when adapter training.</span></span>
+<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_adapter_ref_model</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 
 
 </section>
diff --git a/listings.json b/listings.json
deleted file mode 100644
index 1350f1df7..000000000
--- a/listings.json
+++ /dev/null
@@ -1,13 +0,0 @@
-[
-  {
-    "listing": "/docs/dataset-formats/index.html",
-    "items": [
-      "/docs/dataset-formats/pretraining.html",
-      "/docs/dataset-formats/inst_tune.html",
-      "/docs/dataset-formats/conversation.html",
-      "/docs/dataset-formats/stepwise_supervised.html",
-      "/docs/dataset-formats/template_free.html",
-      "/docs/dataset-formats/tokenized.html"
-    ]
-  }
-]
\ No newline at end of file
diff --git a/search.json b/search.json
index 4ffcddb16..43fc7acf4 100644
--- a/search.json
+++ b/search.json
@@ -332,7 +332,7 @@
     "href": "docs/dataset-formats/conversation.html#chat_template",
     "title": "Conversation",
     "section": "chat_template",
-    "text": "chat_template\nChat Template strategy uses a jinja2 template that converts a list of messages into a prompt. Support using tokenizer’s template, a supported template, or custom jinja2.\n\n\ndata.jsonl\n\n{\"conversations\": [{\"role\": \"...\", \"content\": \"...\"}]}\n\nSee config.qmd for full configs and supported templates.\n\nMigrating from sharegpt\nMost configs can be adapted as follows:\n# old\nchat_template: chatml\ndatasets:\n  - path: ...\n    type: sharegpt\n    conversation: chatml\n\n# new (if using tokenizer's chat_template)\ndatasets:\n  - path: ...\n    type: chat_template\n\n    field_messages: conversations\n    message_field_role: from\n    message_field_content: value\n\n# new (if setting a new chat_template like chatml, gemma, etc)\nchat_template: chatml\ndatasets:\n  - path: ...\n    type: chat_template\n\n    field_messages: conversations\n    message_field_role: from\n    message_field_content: value\nWe recommend checking the below examples for other usecases.\n\n\nExamples\n\nUsing the default chat template in the tokenizer_config.json on OpenAI messages format, training on only last message.\n\ndatasets:\n  - path: ...\n    type: chat_template\n    roles_to_train:\n    train_on_eos:\n\nUsing the gemma chat template to override the tokenizer_config.json’s chat template on OpenAI messages format, training on all assistant messages.\n\nchat_template: gemma # this overwrites the tokenizer's chat_template\ndatasets:\n  - path: ...\n    type: chat_template\n    roles_to_train: [\"assistant\"]  # default value\n\nUsing the tokenizer_config.json’s chat template or chatml as fallback if the former’s chat template does not exist, on OpenAI messages format, training on all assistant messages.\n\nchat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer's chat_template\ndatasets:\n  - path: ...\n    type: chat_template\n\nUsing a custom jinja template on OpenAI messages format, training on all assistant messages.\n\n# chat_template: jinja # `jinja` will be implied if the `chat_template_jinja` is set and this field is empty\nchat_template_jinja: \"{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'&lt;|system|&gt;' + '\\n' + message['content'] + '&lt;|end|&gt;' + '\\n'}}{% elif (message['role'] == 'user') %}{{'&lt;|user|&gt;' + '\\n' + message['content'] + '&lt;|end|&gt;' + '\\n' + '&lt;|assistant|&gt;' + '\\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '&lt;|end|&gt;' + '\\n'}}{% endif %}{% endfor %}\"\n\ndatasets:\n  - path: ...\n    type: chat_template\n\n(Advanced) Using fine-grained control over tokens and turns to train in a conversation\n\nFor a data sample that looks like:\n\n\ndata.jsonl\n\n{\n  \"conversations\": [\n    {\"from\": \"system\", \"value\": \"You are an AI assistant.\", \"train\": false},\n    {\"from\": \"human\", \"value\": \"Hello\", \"train\": false},\n    {\"from\": \"assistant\", \"value\": \"Hello\", \"train\": true},\n    {\"from\": \"human\", \"value\": \"How are you?\", \"train\": true},\n    {\n      \"from\": \"assistant\",\n      \"value\": \"I'm doing very well, thank you!\",\n      \"train_detail\": [\n        {\"begin_offset\": 0, \"end_offset\": 8, \"train\": false},\n        {\"begin_offset\": 9, \"end_offset\": 18, \"train\": true},\n        {\"begin_offset\": 19, \"end_offset\": 30, \"train\": false},\n      ],\n    },\n    {\n        \"from\": \"human\",\n        \"value\": \"I'm doing very well, thank you!\",\n        \"train\": true,\n    },\n    {\"from\": \"assistant\", \"value\": \"Hi there!\", \"train\": true}\n  ]\n}\n\nThe configuration would look like:\ndatasets:\n  - path: ...\n    type: chat_template\n    chat_template: tokenizer_default\n    field_messages: conversations\n    message_field_role: from\n    message_field_content: value\n    roles_to_train: []\n    train_on_eos: turn\n    message_field_training: train\n    message_field_training_detail: train_detail\nTip: It is not necessary to use both message_field_training and message_field_training_detail at a time.",
+    "text": "chat_template\nChat Template strategy uses a jinja2 template that converts a list of messages into a prompt. Support using tokenizer’s template, a supported template, or custom jinja2.\n\n\ndata.jsonl\n\n{\"conversations\": [{\"role\": \"...\", \"content\": \"...\"}]}\n\nSee configs for full configs and supported templates.\n\nMigrating from sharegpt\nMost configs can be adapted as follows:\n# old\nchat_template: chatml\ndatasets:\n  - path: ...\n    type: sharegpt\n    conversation: chatml\n\n# new (if using tokenizer's chat_template)\ndatasets:\n  - path: ...\n    type: chat_template\n\n    field_messages: conversations\n    message_field_role: from\n    message_field_content: value\n\n# new (if setting a new chat_template like chatml, gemma, etc)\nchat_template: chatml\ndatasets:\n  - path: ...\n    type: chat_template\n\n    field_messages: conversations\n    message_field_role: from\n    message_field_content: value\nWe recommend checking the below examples for other usecases.\n\n\nExamples\n\nUsing the default chat template in the tokenizer_config.json on OpenAI messages format, training on only last message.\n\ndatasets:\n  - path: ...\n    type: chat_template\n    roles_to_train:\n    train_on_eos:\n\nUsing the gemma chat template to override the tokenizer_config.json’s chat template on OpenAI messages format, training on all assistant messages.\n\nchat_template: gemma # this overwrites the tokenizer's chat_template\ndatasets:\n  - path: ...\n    type: chat_template\n    roles_to_train: [\"assistant\"]  # default value\n\nUsing the tokenizer_config.json’s chat template or chatml as fallback if the former’s chat template does not exist, on OpenAI messages format, training on all assistant messages.\n\nchat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer's chat_template\ndatasets:\n  - path: ...\n    type: chat_template\n\nUsing a custom jinja template on OpenAI messages format, training on all assistant messages.\n\n# chat_template: jinja # `jinja` will be implied if the `chat_template_jinja` is set and this field is empty\nchat_template_jinja: \"{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'&lt;|system|&gt;' + '\\n' + message['content'] + '&lt;|end|&gt;' + '\\n'}}{% elif (message['role'] == 'user') %}{{'&lt;|user|&gt;' + '\\n' + message['content'] + '&lt;|end|&gt;' + '\\n' + '&lt;|assistant|&gt;' + '\\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '&lt;|end|&gt;' + '\\n'}}{% endif %}{% endfor %}\"\n\ndatasets:\n  - path: ...\n    type: chat_template\n\n(Advanced) Using fine-grained control over tokens and turns to train in a conversation\n\nFor a data sample that looks like:\n\n\ndata.jsonl\n\n{\n  \"conversations\": [\n    {\"from\": \"system\", \"value\": \"You are an AI assistant.\", \"train\": false},\n    {\"from\": \"human\", \"value\": \"Hello\", \"train\": false},\n    {\"from\": \"assistant\", \"value\": \"Hello\", \"train\": true},\n    {\"from\": \"human\", \"value\": \"How are you?\", \"train\": true},\n    {\n      \"from\": \"assistant\",\n      \"value\": \"I'm doing very well, thank you!\",\n      \"train_detail\": [\n        {\"begin_offset\": 0, \"end_offset\": 8, \"train\": false},\n        {\"begin_offset\": 9, \"end_offset\": 18, \"train\": true},\n        {\"begin_offset\": 19, \"end_offset\": 30, \"train\": false},\n      ],\n    },\n    {\n        \"from\": \"human\",\n        \"value\": \"I'm doing very well, thank you!\",\n        \"train\": true,\n    },\n    {\"from\": \"assistant\", \"value\": \"Hi there!\", \"train\": true}\n  ]\n}\n\nThe configuration would look like:\ndatasets:\n  - path: ...\n    type: chat_template\n    chat_template: tokenizer_default\n    field_messages: conversations\n    message_field_role: from\n    message_field_content: value\n    roles_to_train: []\n    train_on_eos: turn\n    message_field_training: train\n    message_field_training_detail: train_detail\nTip: It is not necessary to use both message_field_training and message_field_training_detail at a time.",
     "crumbs": [
       "Dataset Formats",
       "Conversation"
@@ -1055,7 +1055,37 @@
     "href": "docs/dataset-formats/index.html",
     "title": "Dataset Formats",
     "section": "",
-    "text": "Axolotl supports a variety of dataset formats. It is recommended to use a JSONL format. The schema of the JSONL depends upon the task and the prompt template you wish to use. Instead of a JSONL, you can also use a HuggingFace dataset with columns for each JSONL field.\nBelow are these various formats organized by task:\n\n\n\n\n\n\n\n\n\nTitle\n\n\nDescription\n\n\n\n\n\n\nPre-training\n\n\nData format for a pre-training completion task.\n\n\n\n\nInstruction Tuning\n\n\nInstruction tuning formats for supervised fine-tuning.\n\n\n\n\nConversation\n\n\nConversation format for supervised fine-tuning.\n\n\n\n\nStepwise Supervised Format\n\n\nFormat for datasets with stepwise completions and labels\n\n\n\n\nTemplate-Free\n\n\nConstruct prompts without a template.\n\n\n\n\nCustom Pre-Tokenized Dataset\n\n\nHow to use a custom pre-tokenized dataset.\n\n\n\n\n\nNo matching items",
+    "text": "Axolotl is a training framework that aims to make the process convenient yet flexible to users by simply passing a config yaml file.\nAs there are a lot of available options in Axolotl, this guide aims to provide an simplify the user experience to choosing the proper choice.\nAxolotl supports 3 kinds of training methods: pre-training, supervised fine-tuning, and preference-based post-training (e.g. DPO, ORPO, PRMs). Each method has their own dataset format which are described below.",
+    "crumbs": [
+      "Dataset Formats"
+    ]
+  },
+  {
+    "objectID": "docs/dataset-formats/index.html#pre-training",
+    "href": "docs/dataset-formats/index.html#pre-training",
+    "title": "Dataset Formats",
+    "section": "Pre-training",
+    "text": "Pre-training\nWhen aiming to train on large corpora of text datasets, pre-training is your go-to choice. Due to the size of these datasets, downloading the entire-datasets before beginning training would be prohibitively time-consuming. Axolotl supports streaming to only load batches into memory at a time.\nA sample format for a pre-training dataset is as follows:\n{\"text\": \"first row\"}\n{\"text\": \"second row\"}\n...\nIt is typically recommended to save your dataset as .jsonl due to its flexibility and simplicity.\nAxolotl supports loading from a Hugging Face hub repo or from local files.\n\n\n\n\n\n\nImportant\n\n\n\nFor pre-training only, Axolotl would split texts if it exceeds the context length into multiple smaller prompts.\n\n\n\nPre-training from Hugging Face hub datasets\nAs an example, to train using a Hugging Face dataset hf_org/name, you can pass the following config:\npretraining_dataset: hf_org/name\n\n\nPre-training from local dataset files\nGiven a few corpus files: A.jsonl, B.jsonl, and C.jsonl, your config will look like the below:\npretraining_dataset:\n  - path: json\n    data_files:\n      - A.jsonl\n      - B.jsonl\n      - C.jsonl\nWhile we recommend .jsonl, you can also use the other formats (csv, parquet, arrow, SQL, Webdataset) that are supported by Dataset.load_dataset\n\n\nPre-training without streaming\nOn the rare case that the dataset is small and can be loaded entirely into memory, another approach to running pre-training is to use the completion format. This would mean that the entire dataset is pre-tokenized instead of on-demand in streaming.\nOne benefit of this is that the tokenization can be performed separately on a CPU-only machine, and then transferred to a GPU machine for training to save costs.\nFrom Hugging Face:\ndatasets:\n  - path: hf_org/name\n    type: completion\nFrom local files (either example works):\ndatasets:\n  - path: A.jsonl\n    type: completion\n\n  - path: json\n    data_files: [\"A.jsonl\", \"B.jsonl\", \"C.jsonl\"]\n    type: completion\n\n\nPre-training dataset configuration tips\n\nSetting max_steps\nWhen using streaming for large datasets, Axolotl does not know in advance how large the dataset is and does not know when to stop.\nTherefore, it is necessary to set max_steps: int in your config for pre-training to run, so that Axolotl knows when to stop training.\nOne step is equal to sequence_len * micro_batch_size * gradient_accumulation_steps * total_num_gpus tokens.\n\n\nGroup_by_length\nIt is recommended to leave this off if downloading from Hugging Face hub as it would download the entire dataset which can be very large.",
+    "crumbs": [
+      "Dataset Formats"
+    ]
+  },
+  {
+    "objectID": "docs/dataset-formats/index.html#supervised-fine-tuning-sft",
+    "href": "docs/dataset-formats/index.html#supervised-fine-tuning-sft",
+    "title": "Dataset Formats",
+    "section": "Supervised fine-tuning (SFT)",
+    "text": "Supervised fine-tuning (SFT)\nSupervised fine-tuning is the process of training models to respond to an instruction or chat input.\nAs there are a wide variety of dataset formats, Axolotl tries to support a majority of the formats available in public datasets.\nAxolotl provides four approaches for loading datasets, however, it’s easier to work backwards from the dataset you have available to figure out which approach to use.\nA flow chart is as follows:\n\nDo you already have the dataset tokenized? If yes, check Pre-Tokenized Dataset.\nDo you want to format the dataset yourself and manually choose each section to mask? If yes, check Template Free Dataset\nIs your dataset in a “conversation” format, containing a list[messages]? If yes, check Conversation Dataset\nIs your dataset in an “instruct” format, containing { instruction, response }? If yes, check Instruction Dataset\n\nIf you went through the flow chart and did not find one that matches, it is recommended to preprocess your dataset into one of the above or create a Github Discussion.\n\n\n\n\n\n\nTip\n\n\n\nYou can mix and match within each approach or across approaches to train a model on a variety of datasets.\n\n\n\nPre-Tokenized Dataset\nWe suggest this approach when you want to bring your own tokenized dataset.\nAxolotl expects the dataset to have three keys: - input_ids: from tokenizing formatted prompt - attention_mask: for masking padding. If you don’t add padding, it would be equal to len(input_ids) * [1] - labels: this is the same as input_ids, however, if you want to mask certain tokens, you would set those indices to -100.\n\n\n\n\n\n\nTip\n\n\n\nMake sure to add BOS/EOS tokens to your prompt and mask it appropriately.\n\n\nA config for this would look like:\ndatasets:\n  - path: A.jsonl\n    type:\n\n\n\n\n\n\nNote\n\n\n\ntype: is empty!\n\n\n\n\nTemplate Free Dataset\nWe reccomend this approach when you want granular control over the prompt formatting, special tokens, and masking, whilst letting Axolotl handle the tokenization. This is very useful if your dataset has unique prompts that differ across samples and where one single general template wouldn’t suffice.\nIn the example below, you could see that there is no proper structure. At the same time, it’s very flexible as there are no constraints on how your prompt can look.\n{\n    \"segments\": [\n        {\n            \"label\": true,\n            \"text\": \"&lt;s&gt;Hello\\n\"\n        },\n        {\n            \"label\": true,\n            \"text\": \"hi there!. \"\n        },\n        {\n            \"label\": false,\n            \"text\": \"goodbye \"\n        },\n        {\n            \"label\": true,\n            \"text\": \"farewell&lt;/s&gt;\"\n        }\n    ]\n}\nEach prompt must be have a key called segments which is a list of { text, label }.\ndatasets:\n  - path: A.jsonl\n    type: input_output\n\n\nConversation Dataset\nconversation messages are a list of messages which usually contain a role and content key.\n\n\n\n\n\n\nTip\n\n\n\nFun fact: Axolotl synonymously refers to “chat” messages as conversation messages due to how FastChat initially used this term to build a widely used fastchat conversation method for formatting chat messages prior to the creation of chat_templates.\n\n\n\nWhat are chat_templates?\nThe current most popular and convenient method for inference is to use chat_templates for formatting prompts. Axolotl supports using chat_templates for training to ensure that the model performs in the same environment as in inference.\nHere’s a quick rundown on chat_template: A chat_template is a Jinja2 template which formats a list of messages into a prompt.\nAn example of a prompt formatted into a popular template called ChatML can be seen below:\nSingle prompt (pretty-printed):\n{\n    \"messages\": [\n        {\n            \"role\": \"user\",\n            \"content\": \"Hi\"\n        },\n        {\n            \"role\": \"assistant\",\n            \"content\": \"How can I help you?\"\n        },\n        {\n            \"role\": \"user\",\n            \"content\": \"Can you add 3+5?\"\n        },\n        {\n            \"role\": \"assistant\",\n            \"content\": \"The answer is 8.\"\n        }\n    ]\n}\nThe ChatML template is as follows:\n{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'&lt;|im_start|&gt;' + message['role'] + '\\n' + message['content'] + '&lt;|im_end|&gt;' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '&lt;|im_start|&gt;assistant\\n' }}{% endif %}\nThe above prompt formatted into this template will result in:\n&lt;|im_start|&gt;user\nHi&lt;|im_end|&gt;\n&lt;|im_start|&gt;assistant\nHow can I help you?&lt;|im_end|&gt;\n&lt;|im_start|&gt;user\nCan you add 3+5?&lt;|im_end|&gt;\n&lt;|im_start|&gt;assistant\nThe answer is 8.&lt;|im_end|&gt;\nBy using delimiters (&lt;|im_start|&gt; and &lt;|im_end|&gt;), a prompt separates different speakers which helps the model identify which portion belongs to whom.\n\n\nCommon Conversation Dataset formats\nOlder conversation datasets with the following format are colloquially called sharegpt datasets.\n{\"conversations\": [{\"from\": \"...\", \"value\": \"...\"}]}\nNewer conversation datasets usually follow the OpenAI format.\n{\"messages\": [{\"role\": \"...\", \"content\": \"...\"}]}\nAxolotl supports both as well as allowing customization of any kind of key.\n\n\nChat Template Usage\nTo properly use this method, it is important to identify three things:\n\nWhich chat_template would you use?\nWhat are the keys in your dataset, and what are the possible roles? For example, in OpenAI format, the keys would be messages, role, and content, respectively, whereas the possible roles are system, user, and assistant.\nWhat do you want to mask? For instance, only assistant messages, only last message, or nothing.\n\n\nChoosing a chat_template\nThere are a lot of chat_templates out there. Axolotl supports the common ones: supported chat templates. For example, to use ChatML, it would be chat_template: chatml.\nHowever, it is also possible to use the already configured template within the tokenizer by specifying chat_template: tokenizer_default. If you want a fallback (in case some tokenizer does not have it pre-configured), you can do chat_template: tokenizer_default_fallback_chatml to fallback to the ChatML template if a tokenizer template was not found.\nOne last but powerful approach is to bring your own template. This can be set via:\nchat_template_jinja: # your template\n\n\nSetting chat_template dataset keys\nWe currently default to OpenAI format for dataset keys, so if that’s your current dataset format, there’s nothing to do here.\nIf your dataset format is different, here are the keys you should check (with their defaults):\ndatasets:\n    ...\n    field_messages: messages\n    message_field_role: role\n    message_field_content: content\nIn some chat_templates (e.g. Gemma), the roles are hardcoded to user and assistant. Consequently, you may find it necessary to map the roles in your dataset to these above. We currently have some defaults that should work for common datasets, but if you get a KeyError, it would be necessary to add mapping for your roles. Here is an example of how it would look like:\ndatasets:\n    ...\n    roles:\n      assistant:\n        - gpt\n        - model\n      user:\n        - human\nIn the example above, all gpt and model values are converted to assistant. All human values are converted to user.\n\n\nHandling masking\nThe common use case for chat_template is for chat messages, therefore, it is common to mask all non-assistant messages. Assistant messages refer to the bot messages that you want the model to learn on.\nTo train on all assistant messages, you would set the following configs.\ndatasets:\n    ...\n    roles_to_train: [\"assistant\"]\n    train_on_eos: \"turn\"\nThe train_on_eos config means that it would mask all EOS tokens for turns that aren’t assistant-turns. The other options are: all and last to choose which EOS to train on.\nPerhaps, you want to train on assistant and narrator roles, you can simply add narrator to the list of roles_to_train. You would also need to add it to the mapping of roles above.\ndatasets:\n    ...\n    roles_to_train: [\"assistant\", \"narrator\"]\n    roles:\n      assistant:\n        - gpt\n        - model\n      user:\n        - human\n      narrator: [\"narrator\"]\n\n\n\nApplying chat_template\nOnce all the above steps are completed, you could combine all these configs together to form a bespoke configuration for your custom dataset. The final step would be to correctly set the EOS token in your config:\ndatasets:\n  - path: A.jsonl\n    type: chat_template\n\n    # step 1\n    chat_template: chatml\n\n    # step 2\n    field_messages: messages\n    message_field_role: role\n    message_field_content: content\n\n    roles:\n      assistant:\n        - gpt\n        - model\n        - assistant\n      user:\n        - human\n        - user\n\n    # step 3\n    roles_to_train: [\"assistant\"]\n    train_on_eos: \"turn\"\n\nspecial_tokens:\n  eos_token: &lt;|im_end|&gt;\nIf this config were to be applied to the sample dataset above, the output would look as such (which can be retrieved via axolotl preprocess config.yaml --debug):\n&lt;|im_start|&gt;(-100, 128256) user(-100, 882)\n(-100, 198) Hi(-100, 13347) &lt;|im_end|&gt;(-100, 128257)\n(-100, 198) &lt;|im_start|&gt;(-100, 128256) assistant(-100, 78191)\n(-100, 198) How(4438, 4438)  can(649, 649)  I(358, 358)  help(1520, 1520)  you(499, 499) ?(30, 30) &lt;|im_end|&gt;(128257, 128257)\n(-100, 198) &lt;|im_start|&gt;(-100, 128256) user(-100, 882)\n(-100, 198) Can(-100, 6854)  you(-100, 499)  add(-100, 923)  (-100, 220) 3(-100, 18) +(-100, 10) 5(-100, 20) ?(-100, 30) &lt;|im_end|&gt;(-100, 128257)\n(-100, 198) &lt;|im_start|&gt;(-100, 128256) assistant(-100, 78191)\n(-100, 198) The(791, 791)  answer(4320, 4320)  is(374, 374)  (220, 220) 8(23, 23) .(13, 13) &lt;|im_end|&gt;(128257, 128257)\n(-100, 198)\nThe first number refers to the label, the second refers to the token_id. For example, -100 labels appear on non-assistant portions, meaning that they are masked during. For assistant portions, the label is the same as the token_id.\n\n\n\nInstruction Dataset\nInstruction datasets are used to train instruction-following models and comprise a prompt, containing an instruction, and a single response. In contrast to chat datasets which may be multi-turn, instruct datasets are typically single-turn.\nAn example is of a common format called Alpaca:\n{\"instruction\": \"...\", \"input\": \"...\", \"output\": \"...\"}\nUsing those keys, a prompt can be built based on it.\nBelow is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n{output}\nThis can be configured as such:\ndatasets:\n  - path: A.jsonl\n    type: alpaca\nAxolotl supports many kinds of instruction dataset. All of them can be found here (https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/inst_tune.html) with their respective type and sample row format.\n\nCustom Instruct Prompt Format\nDue to the myriad possibilities of instruction formats, Axolotl allows customizing your own instruction format without having to dive into the code directly.\nIn the example below, a sample row is used to output in mistral_v1 format.\n{\"input\": \"...\", \"output\": \"...\"}\ndatasets:\n  - path: repo\n    type:\n      system_prompt: \"\"\n\n      field_system:\n      field_instruction: input\n      field_input:\n      field_output: output\n\n      # multi-line example with input\n      format: |-\n        [INST] {instruction} {input} [/INST]\n\n      # single-line example without input\n      no_input_format: \"[INST] {instruction} [/INST]\"\nThe config sets that the field_instruction is actually named input, and the field_input is empty as we don’t have an input in this sample. Generally, instruction can be thought as the question to the model, and input as the additional information with output being the response. It is not necessary to have an input nor system. In the end, the most important part is to understand what format you want it to look like and how you can customize this to your use case.",
+    "crumbs": [
+      "Dataset Formats"
+    ]
+  },
+  {
+    "objectID": "docs/dataset-formats/index.html#reinforcement-learning-from-human-feedback-rlhf",
+    "href": "docs/dataset-formats/index.html#reinforcement-learning-from-human-feedback-rlhf",
+    "title": "Dataset Formats",
+    "section": "Reinforcement Learning from Human Feedback (RLHF)",
+    "text": "Reinforcement Learning from Human Feedback (RLHF)\nAs there are multiple RLHF methods with their own dataset requirements. Please see RLHF datasets documentation for more detail.",
     "crumbs": [
       "Dataset Formats"
     ]
@@ -1214,7 +1244,73 @@
     "href": "docs/rlhf.html",
     "title": "RLHF (Beta)",
     "section": "",
-    "text": "Overview\nReinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback. Various methods include, but not limited to:\n\nProximal Policy Optimization (PPO) (not yet supported in axolotl)\nDirect Preference Optimization (DPO)\nIdentity Preference Optimization (IPO)\n\n\n\nRLHF using Axolotl\n\n[!IMPORTANT] This is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.\n\nThe various RL training methods are implemented in trl and wrapped via axolotl. Below are various examples with how you can use various preference datasets to train models that use ChatML\n\nDPO\nrl: dpo\ndatasets:\n  - path: Intel/orca_dpo_pairs\n    split: train\n    type: chatml.intel\n  - path: argilla/ultrafeedback-binarized-preferences\n    split: train\n    type: chatml\n\n\nIPO\nrl: ipo\n\n\nORPO\nPaper: https://arxiv.org/abs/2403.07691\nrl: orpo\norpo_alpha: 0.1\nremove_unused_columns: false\n\nchat_template: chatml\ndatasets:\n  - path: argilla/ultrafeedback-binarized-preferences-cleaned\n    type: chat_template.argilla\n\n\nKTO\nrl: kto\nrl_beta: 0.5\nkto_desirable_weight: 0.2\n\nremove_unused_columns: false\n\ndatasets:\n  - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto\n    type: llama3.ultra\n    split: train\n\ngradient_checkpointing: true\ngradient_checkpointing_kwargs:\n  use_reentrant: true\n\n\nUsing local dataset files\ndatasets:\n  - ds_type: json\n    data_files:\n      - orca_rlhf.jsonl\n    split: train\n    type: chatml.intel\n\n\nTrl autounwrap for peft\nTrl supports autounwrapping peft models, so that a ref model does not need to be additionally loaded, leading to less VRAM needed. This is on by default. To turn it off, pass the following config.\n# load ref model when adapter training.\nrl_adapter_ref_model: true",
+    "text": "Reinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback. Various methods include, but not limited to:\n\nProximal Policy Optimization (PPO) (not yet supported in axolotl)\nDirect Preference Optimization (DPO)\nIdentity Preference Optimization (IPO)\nKahneman-Tversky Optimization (KTO)\nOdds Ratio Preference Optimization (ORPO)",
+    "crumbs": [
+      "How-To Guides",
+      "RLHF (Beta)"
+    ]
+  },
+  {
+    "objectID": "docs/rlhf.html#dpo",
+    "href": "docs/rlhf.html#dpo",
+    "title": "RLHF (Beta)",
+    "section": "DPO",
+    "text": "DPO\nExample config:\nrl: dpo\ndatasets:\n  - path: Intel/orca_dpo_pairs\n    split: train\n    type: chatml.intel\n  - path: argilla/ultrafeedback-binarized-preferences\n    split: train\n    type: chatml\nDPO supports the following types with the following dataset format:\n\nchatml.argilla\n{\n    \"system\": \"...\", // optional\n    \"instruction\": \"...\",\n    \"chosen_response\": \"...\",\n    \"rejected_response\": \"...\"\n}\n\n\nchatml.argilla_chat\n{\n    \"chosen\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ],\n    \"rejected\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ]\n}\n\n\nchatml.icr\n{\n    \"system\": \"...\", // optional\n    \"input\": \"...\",\n    \"chosen\": \"...\",\n    \"rejected\": \"...\"\n}\n\n\nchatml.intel\n{\n    \"system\": \"...\", // optional\n    \"question\": \"...\",\n    \"chosen\": \"...\",\n    \"rejected\": \"...\"\n}\n\n\nchatml.prompt_pairs\n{\n    \"system\": \"...\", // optional\n    \"prompt\": \"...\",\n    \"chosen\": \"...\",\n    \"rejected\": \"...\"\n}\n\n\nchatml.ultra\n{\n    \"system\": \"...\", // optional\n    \"prompt\": \"...\",\n    \"chosen\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ],\n    \"rejected\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ]\n}\n\n\nllama3.argilla\n{\n    \"system\": \"...\", // optional\n    \"instruction\": \"...\",\n    \"chosen_response\": \"...\",\n    \"rejected_response\": \"...\"\n}\n\n\nllama3.argilla_chat\n{\n    \"chosen\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ],\n    \"rejected\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ]\n}\n\n\nllama3.icr\n{\n    \"system\": \"...\", // optional\n    \"input\": \"...\",\n    \"chosen\": \"...\",\n    \"rejected\": \"...\"\n}\n\n\nllama3.intel\n{\n    \"system\": \"...\", // optional\n    \"question\": \"...\",\n    \"chosen\": \"...\",\n    \"rejected\": \"...\"\n}\n\n\nllama3.prompt_pairs\n{\n    \"system\": \"...\", // optional\n    \"prompt\": \"...\",\n    \"chosen\": \"...\",\n    \"rejected\": \"...\"\n}\n\n\nllama3.ultra\n{\n    \"system\": \"...\", // optional\n    \"prompt\": \"...\",\n    \"chosen\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ],\n    \"rejected\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ]\n}\n\n\nzephyr.nectar\n{\n    \"prompt\": \"...\",\n    \"answers\": [\n        {\n            \"answer\": \"...\",\n            \"rank\": 1\n        },\n        {\n            \"answer\": \"...\",\n            \"rank\": 2\n        }\n        // ... more answers with ranks\n    ]\n}\n\n\nchat_template.default\nrl: dpo\ndatasets:\n  - path: ...\n    split: train\n    type: chat_template.default\n    field_messages: \"messages\"\n    field_chosen: \"chosen\"\n    field_rejected: \"rejected\"\n    message_field_role: \"role\"\n    message_field_content: \"content\"\n    roles:\n      user: [\"user\"]\n      assistant: [\"assistant\"]\n      system: [\"system\"]\nSample input format:\n{\n    \"messages\": [\n        {\n            \"role\": \"system\",\n            \"content\": \"...\"\n        },\n        {\n            \"role\": \"user\",\n            \"content\": \"...\"\n        },\n        // ... more messages\n    ],\n    \"chosen\": {\n        \"role\": \"assistant\",\n        \"content\": \"...\"\n    },\n    \"rejected\": {\n        \"role\": \"assistant\",\n        \"content\": \"...\"\n    }\n}\n\n\nuser_defined.default\nFor custom behaviors,\nrl: dpo\ndatasets:\n  - path: ...\n    split: train\n    type: user_defined.default\n\n    field_prompt: \"prompt\"\n    field_system: \"system\"\n    field_chosen: \"chosen\"\n    field_rejected: \"rejected\"\n    prompt_format: \"{prompt}\"\n    chosen_format: \"{chosen}\"\n    rejected_format: \"{rejected}\"\nThe input format is a simple JSON input with customizable fields based on the above config.\n{\n    \"system\": \"...\",  // optional\n    \"prompt\": \"...\",\n    \"chosen\": \"...\",\n    \"rejected\": \"...\"\n}",
+    "crumbs": [
+      "How-To Guides",
+      "RLHF (Beta)"
+    ]
+  },
+  {
+    "objectID": "docs/rlhf.html#ipo",
+    "href": "docs/rlhf.html#ipo",
+    "title": "RLHF (Beta)",
+    "section": "IPO",
+    "text": "IPO\nAs IPO is just DPO with a different loss function, all supported options for DPO works here.\nrl: ipo",
+    "crumbs": [
+      "How-To Guides",
+      "RLHF (Beta)"
+    ]
+  },
+  {
+    "objectID": "docs/rlhf.html#orpo",
+    "href": "docs/rlhf.html#orpo",
+    "title": "RLHF (Beta)",
+    "section": "ORPO",
+    "text": "ORPO\nPaper: https://arxiv.org/abs/2403.07691\nrl: orpo\norpo_alpha: 0.1\nremove_unused_columns: false\n\nchat_template: chatml\ndatasets:\n  - path: argilla/ultrafeedback-binarized-preferences-cleaned\n    type: chat_template.argilla\nORPO supports the following types with the following dataset format:\n\nchat_template.argilla\n{\n    \"system\": \"...\",  // optional\n    \"prompt\": \"...\",  // if available, will be taken as user message for single-turn instead of from list below\n\n    // chosen/rejected should be same till last content and only even-number of alternating user/assistant turns\n    \"chosen\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ],\n    \"rejected\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ]\n}",
+    "crumbs": [
+      "How-To Guides",
+      "RLHF (Beta)"
+    ]
+  },
+  {
+    "objectID": "docs/rlhf.html#kto",
+    "href": "docs/rlhf.html#kto",
+    "title": "RLHF (Beta)",
+    "section": "KTO",
+    "text": "KTO\nrl: kto\nrl_beta: 0.5\nkto_desirable_weight: 0.2\n\nremove_unused_columns: false\n\ndatasets:\n  - path: argilla/ultrafeedback-binarized-preferences-cleaned-kto\n    type: llama3.ultra\n    split: train\n\ngradient_checkpointing: true\ngradient_checkpointing_kwargs:\n  use_reentrant: true\nKTO supports the following types with the following dataset format:\n\nchatml.argilla\n{\n    \"system\": \"...\", // optional\n    \"instruction\": \"...\",\n    \"completion\": \"...\"\n}\n\n\nchatml.argilla_chat\n{\n    \"chosen\": [\n        {\"role\": \"user\", \"content\": \"...\"}\n    ],\n    \"completion\": [\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ]\n}\n\n\nchatml.intel\n{\n    \"system\": \"...\", // optional\n    \"question\": \"...\",\n    \"completion\": \"...\"\n}\n\n\nchatml.prompt_pairs\n{\n    \"system\": \"...\", // optional\n    \"prompt\": \"...\",\n    \"completion\": \"...\"\n}\n\n\nchatml.ultra\n{\n    \"system\": \"...\", // optional\n    \"prompt\": \"...\",\n    \"completion\": \"...\"\n}\n\n\nllama3.argilla\n{\n    \"system\": \"...\", // optional\n    \"instruction\": \"...\",\n    \"completion\": \"...\"\n}\n\n\nllama3.argilla_chat\n{\n    \"completion\": [\n        {\"role\": \"user\", \"content\": \"...\"},\n        {\"role\": \"assistant\", \"content\": \"...\"}\n    ]\n}\n\n\nllama3.intel\n{\n    \"system\": \"...\", // optional\n    \"question\": \"...\",\n    \"completion\": \"...\"\n}\n\n\nllama3.prompt_pairs\n{\n    \"system\": \"...\", // optional\n    \"prompt\": \"...\",\n    \"completion\": \"...\"\n}\n\n\nllama3.ultra\n{\n    \"system\": \"...\", // optional\n    \"prompt\": \"...\",\n    \"completion\": \"...\"\n}\n\n\nuser_defined.default\nFor custom behaviors,\nrl: kto\ndatasets:\n  - path: ...\n    split: train\n    type: user_defined.default\n\n    field_prompt: \"prompt\"\n    field_system: \"system\"\n    field_completion: \"completion\"\n    field_label: \"label\"\n    prompt_format: \"{prompt}\"\n    completion_format: \"{completion}\"\nThe input format is a simple JSON input with customizable fields based on the above config.\n{\n    \"system\": \"...\",  // optional\n    \"prompt\": \"...\",\n    \"completion\": \"...\",\n    \"label\": \"...\"\n}",
+    "crumbs": [
+      "How-To Guides",
+      "RLHF (Beta)"
+    ]
+  },
+  {
+    "objectID": "docs/rlhf.html#using-local-dataset-files",
+    "href": "docs/rlhf.html#using-local-dataset-files",
+    "title": "RLHF (Beta)",
+    "section": "Using local dataset files",
+    "text": "Using local dataset files\ndatasets:\n  - ds_type: json\n    data_files:\n      - orca_rlhf.jsonl\n    split: train\n    type: chatml.intel",
+    "crumbs": [
+      "How-To Guides",
+      "RLHF (Beta)"
+    ]
+  },
+  {
+    "objectID": "docs/rlhf.html#trl-auto-unwrapping-for-peft",
+    "href": "docs/rlhf.html#trl-auto-unwrapping-for-peft",
+    "title": "RLHF (Beta)",
+    "section": "TRL auto-unwrapping for PEFT",
+    "text": "TRL auto-unwrapping for PEFT\nTRL supports auto-unwrapping PEFT models for RL training paradigms which rely on a reference model. This significantly reduces memory pressure as an additional refreference model does not need to be loaded, and reference model log-probabilities can be obtained by disabling PEFT adapters. This is enabled by default. To turn it off, pass the following config:\n# load ref model when adapter training.\nrl_adapter_ref_model: true",
     "crumbs": [
       "How-To Guides",
       "RLHF (Beta)"
diff --git a/site_libs/quarto-listing/list.min.js b/site_libs/quarto-listing/list.min.js
deleted file mode 100644
index 43dfd15a1..000000000
--- a/site_libs/quarto-listing/list.min.js
+++ /dev/null
@@ -1,2 +0,0 @@
-var List;List=function(){var t={"./src/add-async.js":function(t){t.exports=function(t){return function e(r,n,s){var i=r.splice(0,50);s=(s=s||[]).concat(t.add(i)),r.length>0?setTimeout((function(){e(r,n,s)}),1):(t.update(),n(s))}}},"./src/filter.js":function(t){t.exports=function(t){return t.handlers.filterStart=t.handlers.filterStart||[],t.handlers.filterComplete=t.handlers.filterComplete||[],function(e){if(t.trigger("filterStart"),t.i=1,t.reset.filter(),void 0===e)t.filtered=!1;else{t.filtered=!0;for(var r=t.items,n=0,s=r.length;n<s;n++){var i=r[n];e(i)?i.filtered=!0:i.filtered=!1}}return t.update(),t.trigger("filterComplete"),t.visibleItems}}},"./src/fuzzy-search.js":function(t,e,r){r("./src/utils/classes.js");var n=r("./src/utils/events.js"),s=r("./src/utils/extend.js"),i=r("./src/utils/to-string.js"),a=r("./src/utils/get-by-class.js"),o=r("./src/utils/fuzzy.js");t.exports=function(t,e){e=s({location:0,distance:100,threshold:.4,multiSearch:!0,searchClass:"fuzzy-search"},e=e||{});var r={search:function(n,s){for(var i=e.multiSearch?n.replace(/ +$/,"").split(/ +/):[n],a=0,o=t.items.length;a<o;a++)r.item(t.items[a],s,i)},item:function(t,e,n){for(var s=!0,i=0;i<n.length;i++){for(var a=!1,o=0,l=e.length;o<l;o++)r.values(t.values(),e[o],n[i])&&(a=!0);a||(s=!1)}t.found=s},values:function(t,r,n){if(t.hasOwnProperty(r)){var s=i(t[r]).toLowerCase();if(o(s,n,e))return!0}return!1}};return n.bind(a(t.listContainer,e.searchClass),"keyup",t.utils.events.debounce((function(e){var n=e.target||e.srcElement;t.search(n.value,r.search)}),t.searchDelay)),function(e,n){t.search(e,n,r.search)}}},"./src/index.js":function(t,e,r){var n=r("./node_modules/string-natural-compare/natural-compare.js"),s=r("./src/utils/get-by-class.js"),i=r("./src/utils/extend.js"),a=r("./src/utils/index-of.js"),o=r("./src/utils/events.js"),l=r("./src/utils/to-string.js"),u=r("./src/utils/classes.js"),c=r("./src/utils/get-attribute.js"),f=r("./src/utils/to-array.js");t.exports=function(t,e,h){var d,v=this,g=r("./src/item.js")(v),m=r("./src/add-async.js")(v),p=r("./src/pagination.js")(v);d={start:function(){v.listClass="list",v.searchClass="search",v.sortClass="sort",v.page=1e4,v.i=1,v.items=[],v.visibleItems=[],v.matchingItems=[],v.searched=!1,v.filtered=!1,v.searchColumns=void 0,v.searchDelay=0,v.handlers={updated:[]},v.valueNames=[],v.utils={getByClass:s,extend:i,indexOf:a,events:o,toString:l,naturalSort:n,classes:u,getAttribute:c,toArray:f},v.utils.extend(v,e),v.listContainer="string"==typeof t?document.getElementById(t):t,v.listContainer&&(v.list=s(v.listContainer,v.listClass,!0),v.parse=r("./src/parse.js")(v),v.templater=r("./src/templater.js")(v),v.search=r("./src/search.js")(v),v.filter=r("./src/filter.js")(v),v.sort=r("./src/sort.js")(v),v.fuzzySearch=r("./src/fuzzy-search.js")(v,e.fuzzySearch),this.handlers(),this.items(),this.pagination(),v.update())},handlers:function(){for(var t in v.handlers)v[t]&&v.handlers.hasOwnProperty(t)&&v.on(t,v[t])},items:function(){v.parse(v.list),void 0!==h&&v.add(h)},pagination:function(){if(void 0!==e.pagination){!0===e.pagination&&(e.pagination=[{}]),void 0===e.pagination[0]&&(e.pagination=[e.pagination]);for(var t=0,r=e.pagination.length;t<r;t++)p(e.pagination[t])}}},this.reIndex=function(){v.items=[],v.visibleItems=[],v.matchingItems=[],v.searched=!1,v.filtered=!1,v.parse(v.list)},this.toJSON=function(){for(var t=[],e=0,r=v.items.length;e<r;e++)t.push(v.items[e].values());return t},this.add=function(t,e){if(0!==t.length){if(!e){var r=[],n=!1;void 0===t[0]&&(t=[t]);for(var s=0,i=t.length;s<i;s++){var a;n=v.items.length>v.page,a=new g(t[s],void 0,n),v.items.push(a),r.push(a)}return v.update(),r}m(t.slice(0),e)}},this.show=function(t,e){return this.i=t,this.page=e,v.update(),v},this.remove=function(t,e,r){for(var n=0,s=0,i=v.items.length;s<i;s++)v.items[s].values()[t]==e&&(v.templater.remove(v.items[s],r),v.items.splice(s,1),i--,s--,n++);return v.update(),n},this.get=function(t,e){for(var r=[],n=0,s=v.items.length;n<s;n++){var i=v.items[n];i.values()[t]==e&&r.push(i)}return r},this.size=function(){return v.items.length},this.clear=function(){return v.templater.clear(),v.items=[],v},this.on=function(t,e){return v.handlers[t].push(e),v},this.off=function(t,e){var r=v.handlers[t],n=a(r,e);return n>-1&&r.splice(n,1),v},this.trigger=function(t){for(var e=v.handlers[t].length;e--;)v.handlers[t][e](v);return v},this.reset={filter:function(){for(var t=v.items,e=t.length;e--;)t[e].filtered=!1;return v},search:function(){for(var t=v.items,e=t.length;e--;)t[e].found=!1;return v}},this.update=function(){var t=v.items,e=t.length;v.visibleItems=[],v.matchingItems=[],v.templater.clear();for(var r=0;r<e;r++)t[r].matching()&&v.matchingItems.length+1>=v.i&&v.visibleItems.length<v.page?(t[r].show(),v.visibleItems.push(t[r]),v.matchingItems.push(t[r])):t[r].matching()?(v.matchingItems.push(t[r]),t[r].hide()):t[r].hide();return v.trigger("updated"),v},d.start()}},"./src/item.js":function(t){t.exports=function(t){return function(e,r,n){var s=this;this._values={},this.found=!1,this.filtered=!1;this.values=function(e,r){if(void 0===e)return s._values;for(var n in e)s._values[n]=e[n];!0!==r&&t.templater.set(s,s.values())},this.show=function(){t.templater.show(s)},this.hide=function(){t.templater.hide(s)},this.matching=function(){return t.filtered&&t.searched&&s.found&&s.filtered||t.filtered&&!t.searched&&s.filtered||!t.filtered&&t.searched&&s.found||!t.filtered&&!t.searched},this.visible=function(){return!(!s.elm||s.elm.parentNode!=t.list)},function(e,r,n){if(void 0===r)n?s.values(e,n):s.values(e);else{s.elm=r;var i=t.templater.get(s,e);s.values(i)}}(e,r,n)}}},"./src/pagination.js":function(t,e,r){var n=r("./src/utils/classes.js"),s=r("./src/utils/events.js"),i=r("./src/index.js");t.exports=function(t){var e=!1,r=function(r,s){if(t.page<1)return t.listContainer.style.display="none",void(e=!0);e&&(t.listContainer.style.display="block");var i,o=t.matchingItems.length,l=t.i,u=t.page,c=Math.ceil(o/u),f=Math.ceil(l/u),h=s.innerWindow||2,d=s.left||s.outerWindow||0,v=s.right||s.outerWindow||0;v=c-v,r.clear();for(var g=1;g<=c;g++){var m=f===g?"active":"";a.number(g,d,v,f,h)?(i=r.add({page:g,dotted:!1})[0],m&&n(i.elm).add(m),i.elm.firstChild.setAttribute("data-i",g),i.elm.firstChild.setAttribute("data-page",u)):a.dotted(r,g,d,v,f,h,r.size())&&(i=r.add({page:"...",dotted:!0})[0],n(i.elm).add("disabled"))}},a={number:function(t,e,r,n,s){return this.left(t,e)||this.right(t,r)||this.innerWindow(t,n,s)},left:function(t,e){return t<=e},right:function(t,e){return t>e},innerWindow:function(t,e,r){return t>=e-r&&t<=e+r},dotted:function(t,e,r,n,s,i,a){return this.dottedLeft(t,e,r,n,s,i)||this.dottedRight(t,e,r,n,s,i,a)},dottedLeft:function(t,e,r,n,s,i){return e==r+1&&!this.innerWindow(e,s,i)&&!this.right(e,n)},dottedRight:function(t,e,r,n,s,i,a){return!t.items[a-1].values().dotted&&(e==n&&!this.innerWindow(e,s,i)&&!this.right(e,n))}};return function(e){var n=new i(t.listContainer.id,{listClass:e.paginationClass||"pagination",item:e.item||"<li><a class='page' href='#'></a></li>",valueNames:["page","dotted"],searchClass:"pagination-search-that-is-not-supposed-to-exist",sortClass:"pagination-sort-that-is-not-supposed-to-exist"});s.bind(n.listContainer,"click",(function(e){var r=e.target||e.srcElement,n=t.utils.getAttribute(r,"data-page"),s=t.utils.getAttribute(r,"data-i");s&&t.show((s-1)*n+1,n)})),t.on("updated",(function(){r(n,e)})),r(n,e)}}},"./src/parse.js":function(t,e,r){t.exports=function(t){var e=r("./src/item.js")(t),n=function(r,n){for(var s=0,i=r.length;s<i;s++)t.items.push(new e(n,r[s]))},s=function e(r,s){var i=r.splice(0,50);n(i,s),r.length>0?setTimeout((function(){e(r,s)}),1):(t.update(),t.trigger("parseComplete"))};return t.handlers.parseComplete=t.handlers.parseComplete||[],function(){var e=function(t){for(var e=t.childNodes,r=[],n=0,s=e.length;n<s;n++)void 0===e[n].data&&r.push(e[n]);return r}(t.list),r=t.valueNames;t.indexAsync?s(e,r):n(e,r)}}},"./src/search.js":function(t){t.exports=function(t){var e,r,n,s={resetList:function(){t.i=1,t.templater.clear(),n=void 0},setOptions:function(t){2==t.length&&t[1]instanceof Array?e=t[1]:2==t.length&&"function"==typeof t[1]?(e=void 0,n=t[1]):3==t.length?(e=t[1],n=t[2]):e=void 0},setColumns:function(){0!==t.items.length&&void 0===e&&(e=void 0===t.searchColumns?s.toArray(t.items[0].values()):t.searchColumns)},setSearchString:function(e){e=(e=t.utils.toString(e).toLowerCase()),r=e},toArray:function(t){var e=[];for(var r in t)e.push(r);return e}},i=function(){for(var n,s=[],i=r;null!==(n=i.match(/"([^"]+)"/));)s.push(n[1]),i=i.substring(0,n.index)+i.substring(n.index+n[0].length);(i=i.trim()).length&&(s=s.concat(i.split(/\s+/)));for(var a=0,o=t.items.length;a<o;a++){var l=t.items[a];if(l.found=!1,s.length){for(var u=0,c=s.length;u<c;u++){for(var f=!1,h=0,d=e.length;h<d;h++){var v=l.values(),g=e[h];if(v.hasOwnProperty(g)&&void 0!==v[g]&&null!==v[g])if(-1!==("string"!=typeof v[g]?v[g].toString():v[g]).toLowerCase().indexOf(s[u])){f=!0;break}}if(!f)break}l.found=f}}},a=function(){t.reset.search(),t.searched=!1},o=function(o){return t.trigger("searchStart"),s.resetList(),s.setSearchString(o),s.setOptions(arguments),s.setColumns(),""===r?a():(t.searched=!0,n?n(r,e):i()),t.update(),t.trigger("searchComplete"),t.visibleItems};return t.handlers.searchStart=t.handlers.searchStart||[],t.handlers.searchComplete=t.handlers.searchComplete||[],t.utils.events.bind(t.utils.getByClass(t.listContainer,t.searchClass),"keyup",t.utils.events.debounce((function(e){var r=e.target||e.srcElement;""===r.value&&!t.searched||o(r.value)}),t.searchDelay)),t.utils.events.bind(t.utils.getByClass(t.listContainer,t.searchClass),"input",(function(t){""===(t.target||t.srcElement).value&&o("")})),o}},"./src/sort.js":function(t){t.exports=function(t){var e={els:void 0,clear:function(){for(var r=0,n=e.els.length;r<n;r++)t.utils.classes(e.els[r]).remove("asc"),t.utils.classes(e.els[r]).remove("desc")},getOrder:function(e){var r=t.utils.getAttribute(e,"data-order");return"asc"==r||"desc"==r?r:t.utils.classes(e).has("desc")?"asc":t.utils.classes(e).has("asc")?"desc":"asc"},getInSensitive:function(e,r){var n=t.utils.getAttribute(e,"data-insensitive");r.insensitive="false"!==n},setOrder:function(r){for(var n=0,s=e.els.length;n<s;n++){var i=e.els[n];if(t.utils.getAttribute(i,"data-sort")===r.valueName){var a=t.utils.getAttribute(i,"data-order");"asc"==a||"desc"==a?a==r.order&&t.utils.classes(i).add(r.order):t.utils.classes(i).add(r.order)}}}},r=function(){t.trigger("sortStart");var r={},n=arguments[0].currentTarget||arguments[0].srcElement||void 0;n?(r.valueName=t.utils.getAttribute(n,"data-sort"),e.getInSensitive(n,r),r.order=e.getOrder(n)):((r=arguments[1]||r).valueName=arguments[0],r.order=r.order||"asc",r.insensitive=void 0===r.insensitive||r.insensitive),e.clear(),e.setOrder(r);var s,i=r.sortFunction||t.sortFunction||null,a="desc"===r.order?-1:1;s=i?function(t,e){return i(t,e,r)*a}:function(e,n){var s=t.utils.naturalSort;return s.alphabet=t.alphabet||r.alphabet||void 0,!s.alphabet&&r.insensitive&&(s=t.utils.naturalSort.caseInsensitive),s(e.values()[r.valueName],n.values()[r.valueName])*a},t.items.sort(s),t.update(),t.trigger("sortComplete")};return t.handlers.sortStart=t.handlers.sortStart||[],t.handlers.sortComplete=t.handlers.sortComplete||[],e.els=t.utils.getByClass(t.listContainer,t.sortClass),t.utils.events.bind(e.els,"click",r),t.on("searchStart",e.clear),t.on("filterStart",e.clear),r}},"./src/templater.js":function(t){var e=function(t){var e,r=this,n=function(e,r){var n=e.cloneNode(!0);n.removeAttribute("id");for(var s=0,i=r.length;s<i;s++){var a=void 0,o=r[s];if(o.data)for(var l=0,u=o.data.length;l<u;l++)n.setAttribute("data-"+o.data[l],"");else o.attr&&o.name?(a=t.utils.getByClass(n,o.name,!0))&&a.setAttribute(o.attr,""):(a=t.utils.getByClass(n,o,!0))&&(a.innerHTML="")}return n},s=function(){for(var e=t.list.childNodes,r=0,n=e.length;r<n;r++)if(void 0===e[r].data)return e[r].cloneNode(!0)},i=function(t){if("string"==typeof t){if(/<tr[\s>]/g.exec(t)){var e=document.createElement("tbody");return e.innerHTML=t,e.firstElementChild}if(-1!==t.indexOf("<")){var r=document.createElement("div");return r.innerHTML=t,r.firstElementChild}}},a=function(e,r,n){var s=void 0,i=function(e){for(var r=0,n=t.valueNames.length;r<n;r++){var s=t.valueNames[r];if(s.data){for(var i=s.data,a=0,o=i.length;a<o;a++)if(i[a]===e)return{data:e}}else{if(s.attr&&s.name&&s.name==e)return s;if(s===e)return e}}}(r);i&&(i.data?e.elm.setAttribute("data-"+i.data,n):i.attr&&i.name?(s=t.utils.getByClass(e.elm,i.name,!0))&&s.setAttribute(i.attr,n):(s=t.utils.getByClass(e.elm,i,!0))&&(s.innerHTML=n))};this.get=function(e,n){r.create(e);for(var s={},i=0,a=n.length;i<a;i++){var o=void 0,l=n[i];if(l.data)for(var u=0,c=l.data.length;u<c;u++)s[l.data[u]]=t.utils.getAttribute(e.elm,"data-"+l.data[u]);else l.attr&&l.name?(o=t.utils.getByClass(e.elm,l.name,!0),s[l.name]=o?t.utils.getAttribute(o,l.attr):""):(o=t.utils.getByClass(e.elm,l,!0),s[l]=o?o.innerHTML:"")}return s},this.set=function(t,e){if(!r.create(t))for(var n in e)e.hasOwnProperty(n)&&a(t,n,e[n])},this.create=function(t){return void 0===t.elm&&(t.elm=e(t.values()),r.set(t,t.values()),!0)},this.remove=function(e){e.elm.parentNode===t.list&&t.list.removeChild(e.elm)},this.show=function(e){r.create(e),t.list.appendChild(e.elm)},this.hide=function(e){void 0!==e.elm&&e.elm.parentNode===t.list&&t.list.removeChild(e.elm)},this.clear=function(){if(t.list.hasChildNodes())for(;t.list.childNodes.length>=1;)t.list.removeChild(t.list.firstChild)},function(){var r;if("function"!=typeof t.item){if(!(r="string"==typeof t.item?-1===t.item.indexOf("<")?document.getElementById(t.item):i(t.item):s()))throw new Error("The list needs to have at least one item on init otherwise you'll have to add a template.");r=n(r,t.valueNames),e=function(){return r.cloneNode(!0)}}else e=function(e){var r=t.item(e);return i(r)}}()};t.exports=function(t){return new e(t)}},"./src/utils/classes.js":function(t,e,r){var n=r("./src/utils/index-of.js"),s=/\s+/;Object.prototype.toString;function i(t){if(!t||!t.nodeType)throw new Error("A DOM element reference is required");this.el=t,this.list=t.classList}t.exports=function(t){return new i(t)},i.prototype.add=function(t){if(this.list)return this.list.add(t),this;var e=this.array();return~n(e,t)||e.push(t),this.el.className=e.join(" "),this},i.prototype.remove=function(t){if(this.list)return this.list.remove(t),this;var e=this.array(),r=n(e,t);return~r&&e.splice(r,1),this.el.className=e.join(" "),this},i.prototype.toggle=function(t,e){return this.list?(void 0!==e?e!==this.list.toggle(t,e)&&this.list.toggle(t):this.list.toggle(t),this):(void 0!==e?e?this.add(t):this.remove(t):this.has(t)?this.remove(t):this.add(t),this)},i.prototype.array=function(){var t=(this.el.getAttribute("class")||"").replace(/^\s+|\s+$/g,"").split(s);return""===t[0]&&t.shift(),t},i.prototype.has=i.prototype.contains=function(t){return this.list?this.list.contains(t):!!~n(this.array(),t)}},"./src/utils/events.js":function(t,e,r){var n=window.addEventListener?"addEventListener":"attachEvent",s=window.removeEventListener?"removeEventListener":"detachEvent",i="addEventListener"!==n?"on":"",a=r("./src/utils/to-array.js");e.bind=function(t,e,r,s){for(var o=0,l=(t=a(t)).length;o<l;o++)t[o][n](i+e,r,s||!1)},e.unbind=function(t,e,r,n){for(var o=0,l=(t=a(t)).length;o<l;o++)t[o][s](i+e,r,n||!1)},e.debounce=function(t,e,r){var n;return e?function(){var s=this,i=arguments,a=function(){n=null,r||t.apply(s,i)},o=r&&!n;clearTimeout(n),n=setTimeout(a,e),o&&t.apply(s,i)}:t}},"./src/utils/extend.js":function(t){t.exports=function(t){for(var e,r=Array.prototype.slice.call(arguments,1),n=0;e=r[n];n++)if(e)for(var s in e)t[s]=e[s];return t}},"./src/utils/fuzzy.js":function(t){t.exports=function(t,e,r){var n=r.location||0,s=r.distance||100,i=r.threshold||.4;if(e===t)return!0;if(e.length>32)return!1;var a=n,o=function(){var t,r={};for(t=0;t<e.length;t++)r[e.charAt(t)]=0;for(t=0;t<e.length;t++)r[e.charAt(t)]|=1<<e.length-t-1;return r}();function l(t,r){var n=t/e.length,i=Math.abs(a-r);return s?n+i/s:i?1:n}var u=i,c=t.indexOf(e,a);-1!=c&&(u=Math.min(l(0,c),u),-1!=(c=t.lastIndexOf(e,a+e.length))&&(u=Math.min(l(0,c),u)));var f,h,d=1<<e.length-1;c=-1;for(var v,g=e.length+t.length,m=0;m<e.length;m++){for(f=0,h=g;f<h;)l(m,a+h)<=u?f=h:g=h,h=Math.floor((g-f)/2+f);g=h;var p=Math.max(1,a-h+1),y=Math.min(a+h,t.length)+e.length,C=Array(y+2);C[y+1]=(1<<m)-1;for(var b=y;b>=p;b--){var j=o[t.charAt(b-1)];if(C[b]=0===m?(C[b+1]<<1|1)&j:(C[b+1]<<1|1)&j|(v[b+1]|v[b])<<1|1|v[b+1],C[b]&d){var x=l(m,b-1);if(x<=u){if(u=x,!((c=b-1)>a))break;p=Math.max(1,2*a-c)}}}if(l(m+1,a)>u)break;v=C}return!(c<0)}},"./src/utils/get-attribute.js":function(t){t.exports=function(t,e){var r=t.getAttribute&&t.getAttribute(e)||null;if(!r)for(var n=t.attributes,s=n.length,i=0;i<s;i++)void 0!==n[i]&&n[i].nodeName===e&&(r=n[i].nodeValue);return r}},"./src/utils/get-by-class.js":function(t){t.exports=function(t,e,r,n){return(n=n||{}).test&&n.getElementsByClassName||!n.test&&document.getElementsByClassName?function(t,e,r){return r?t.getElementsByClassName(e)[0]:t.getElementsByClassName(e)}(t,e,r):n.test&&n.querySelector||!n.test&&document.querySelector?function(t,e,r){return e="."+e,r?t.querySelector(e):t.querySelectorAll(e)}(t,e,r):function(t,e,r){for(var n=[],s=t.getElementsByTagName("*"),i=s.length,a=new RegExp("(^|\\s)"+e+"(\\s|$)"),o=0,l=0;o<i;o++)if(a.test(s[o].className)){if(r)return s[o];n[l]=s[o],l++}return n}(t,e,r)}},"./src/utils/index-of.js":function(t){var e=[].indexOf;t.exports=function(t,r){if(e)return t.indexOf(r);for(var n=0,s=t.length;n<s;++n)if(t[n]===r)return n;return-1}},"./src/utils/to-array.js":function(t){t.exports=function(t){if(void 0===t)return[];if(null===t)return[null];if(t===window)return[window];if("string"==typeof t)return[t];if(function(t){return"[object Array]"===Object.prototype.toString.call(t)}(t))return t;if("number"!=typeof t.length)return[t];if("function"==typeof t&&t instanceof Function)return[t];for(var e=[],r=0,n=t.length;r<n;r++)(Object.prototype.hasOwnProperty.call(t,r)||r in t)&&e.push(t[r]);return e.length?e:[]}},"./src/utils/to-string.js":function(t){t.exports=function(t){return t=(t=null===(t=void 0===t?"":t)?"":t).toString()}},"./node_modules/string-natural-compare/natural-compare.js":function(t){"use strict";var e,r,n=0;function s(t){return t>=48&&t<=57}function i(t,e){for(var i=(t+="").length,a=(e+="").length,o=0,l=0;o<i&&l<a;){var u=t.charCodeAt(o),c=e.charCodeAt(l);if(s(u)){if(!s(c))return u-c;for(var f=o,h=l;48===u&&++f<i;)u=t.charCodeAt(f);for(;48===c&&++h<a;)c=e.charCodeAt(h);for(var d=f,v=h;d<i&&s(t.charCodeAt(d));)++d;for(;v<a&&s(e.charCodeAt(v));)++v;var g=d-f-v+h;if(g)return g;for(;f<d;)if(g=t.charCodeAt(f++)-e.charCodeAt(h++))return g;o=d,l=v}else{if(u!==c)return u<n&&c<n&&-1!==r[u]&&-1!==r[c]?r[u]-r[c]:u-c;++o,++l}}return o>=i&&l<a&&i>=a?-1:l>=a&&o<i&&a>=i?1:i-a}i.caseInsensitive=i.i=function(t,e){return i((""+t).toLowerCase(),(""+e).toLowerCase())},Object.defineProperties(i,{alphabet:{get:function(){return e},set:function(t){r=[];var s=0;if(e=t)for(;s<e.length;s++)r[e.charCodeAt(s)]=s;for(n=r.length,s=0;s<n;s++)void 0===r[s]&&(r[s]=-1)}}}),t.exports=i}},e={};return function r(n){if(e[n])return e[n].exports;var s=e[n]={exports:{}};return t[n](s,s.exports,r),s.exports}("./src/index.js")}();
-//# sourceMappingURL=list.min.js.map
\ No newline at end of file
diff --git a/site_libs/quarto-listing/quarto-listing.js b/site_libs/quarto-listing/quarto-listing.js
deleted file mode 100644
index e9a07b2ea..000000000
--- a/site_libs/quarto-listing/quarto-listing.js
+++ /dev/null
@@ -1,254 +0,0 @@
-const kProgressiveAttr = "data-src";
-let categoriesLoaded = false;
-
-window.quartoListingCategory = (category) => {
-  // category is URI encoded in EJS template for UTF-8 support
-  category = decodeURIComponent(atob(category));
-  if (categoriesLoaded) {
-    activateCategory(category);
-    setCategoryHash(category);
-  }
-};
-
-window["quarto-listing-loaded"] = () => {
-  // Process any existing hash
-  const hash = getHash();
-
-  if (hash) {
-    // If there is a category, switch to that
-    if (hash.category) {
-      // category hash are URI encoded so we need to decode it before processing
-      // so that we can match it with the category element processed in JS
-      activateCategory(decodeURIComponent(hash.category));
-    }
-    // Paginate a specific listing
-    const listingIds = Object.keys(window["quarto-listings"]);
-    for (const listingId of listingIds) {
-      const page = hash[getListingPageKey(listingId)];
-      if (page) {
-        showPage(listingId, page);
-      }
-    }
-  }
-
-  const listingIds = Object.keys(window["quarto-listings"]);
-  for (const listingId of listingIds) {
-    // The actual list
-    const list = window["quarto-listings"][listingId];
-
-    // Update the handlers for pagination events
-    refreshPaginationHandlers(listingId);
-
-    // Render any visible items that need it
-    renderVisibleProgressiveImages(list);
-
-    // Whenever the list is updated, we also need to
-    // attach handlers to the new pagination elements
-    // and refresh any newly visible items.
-    list.on("updated", function () {
-      renderVisibleProgressiveImages(list);
-      setTimeout(() => refreshPaginationHandlers(listingId));
-
-      // Show or hide the no matching message
-      toggleNoMatchingMessage(list);
-    });
-  }
-};
-
-window.document.addEventListener("DOMContentLoaded", function (_event) {
-  // Attach click handlers to categories
-  const categoryEls = window.document.querySelectorAll(
-    ".quarto-listing-category .category"
-  );
-
-  for (const categoryEl of categoryEls) {
-    // category needs to support non ASCII characters
-    const category = decodeURIComponent(
-      atob(categoryEl.getAttribute("data-category"))
-    );
-    categoryEl.onclick = () => {
-      activateCategory(category);
-      setCategoryHash(category);
-    };
-  }
-
-  // Attach a click handler to the category title
-  // (there should be only one, but since it is a class name, handle N)
-  const categoryTitleEls = window.document.querySelectorAll(
-    ".quarto-listing-category-title"
-  );
-  for (const categoryTitleEl of categoryTitleEls) {
-    categoryTitleEl.onclick = () => {
-      activateCategory("");
-      setCategoryHash("");
-    };
-  }
-
-  categoriesLoaded = true;
-});
-
-function toggleNoMatchingMessage(list) {
-  const selector = `#${list.listContainer.id} .listing-no-matching`;
-  const noMatchingEl = window.document.querySelector(selector);
-  if (noMatchingEl) {
-    if (list.visibleItems.length === 0) {
-      noMatchingEl.classList.remove("d-none");
-    } else {
-      if (!noMatchingEl.classList.contains("d-none")) {
-        noMatchingEl.classList.add("d-none");
-      }
-    }
-  }
-}
-
-function setCategoryHash(category) {
-  setHash({ category });
-}
-
-function setPageHash(listingId, page) {
-  const currentHash = getHash() || {};
-  currentHash[getListingPageKey(listingId)] = page;
-  setHash(currentHash);
-}
-
-function getListingPageKey(listingId) {
-  return `${listingId}-page`;
-}
-
-function refreshPaginationHandlers(listingId) {
-  const listingEl = window.document.getElementById(listingId);
-  const paginationEls = listingEl.querySelectorAll(
-    ".pagination li.page-item:not(.disabled) .page.page-link"
-  );
-  for (const paginationEl of paginationEls) {
-    paginationEl.onclick = (sender) => {
-      setPageHash(listingId, sender.target.getAttribute("data-i"));
-      showPage(listingId, sender.target.getAttribute("data-i"));
-      return false;
-    };
-  }
-}
-
-function renderVisibleProgressiveImages(list) {
-  // Run through the visible items and render any progressive images
-  for (const item of list.visibleItems) {
-    const itemEl = item.elm;
-    if (itemEl) {
-      const progressiveImgs = itemEl.querySelectorAll(
-        `img[${kProgressiveAttr}]`
-      );
-      for (const progressiveImg of progressiveImgs) {
-        const srcValue = progressiveImg.getAttribute(kProgressiveAttr);
-        if (srcValue) {
-          progressiveImg.setAttribute("src", srcValue);
-        }
-        progressiveImg.removeAttribute(kProgressiveAttr);
-      }
-    }
-  }
-}
-
-function getHash() {
-  // Hashes are of the form
-  // #name:value|name1:value1|name2:value2
-  const currentUrl = new URL(window.location);
-  const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined;
-  return parseHash(hashRaw);
-}
-
-const kAnd = "&";
-const kEquals = "=";
-
-function parseHash(hash) {
-  if (!hash) {
-    return undefined;
-  }
-  const hasValuesStrs = hash.split(kAnd);
-  const hashValues = hasValuesStrs
-    .map((hashValueStr) => {
-      const vals = hashValueStr.split(kEquals);
-      if (vals.length === 2) {
-        return { name: vals[0], value: vals[1] };
-      } else {
-        return undefined;
-      }
-    })
-    .filter((value) => {
-      return value !== undefined;
-    });
-
-  const hashObj = {};
-  hashValues.forEach((hashValue) => {
-    hashObj[hashValue.name] = decodeURIComponent(hashValue.value);
-  });
-  return hashObj;
-}
-
-function makeHash(obj) {
-  return Object.keys(obj)
-    .map((key) => {
-      return `${key}${kEquals}${obj[key]}`;
-    })
-    .join(kAnd);
-}
-
-function setHash(obj) {
-  const hash = makeHash(obj);
-  window.history.pushState(null, null, `#${hash}`);
-}
-
-function showPage(listingId, page) {
-  const list = window["quarto-listings"][listingId];
-  if (list) {
-    list.show((page - 1) * list.page + 1, list.page);
-  }
-}
-
-function activateCategory(category) {
-  // Deactivate existing categories
-  const activeEls = window.document.querySelectorAll(
-    ".quarto-listing-category .category.active"
-  );
-  for (const activeEl of activeEls) {
-    activeEl.classList.remove("active");
-  }
-
-  // Activate this category
-  const categoryEl = window.document.querySelector(
-    `.quarto-listing-category .category[data-category='${btoa(
-      encodeURIComponent(category)
-    )}']`
-  );
-  if (categoryEl) {
-    categoryEl.classList.add("active");
-  }
-
-  // Filter the listings to this category
-  filterListingCategory(category);
-}
-
-function filterListingCategory(category) {
-  const listingIds = Object.keys(window["quarto-listings"]);
-  for (const listingId of listingIds) {
-    const list = window["quarto-listings"][listingId];
-    if (list) {
-      if (category === "") {
-        // resets the filter
-        list.filter();
-      } else {
-        // filter to this category
-        list.filter(function (item) {
-          const itemValues = item.values();
-          if (itemValues.categories !== null) {
-            const categories = decodeURIComponent(
-              atob(itemValues.categories)
-            ).split(",");
-            return categories.includes(category);
-          } else {
-            return false;
-          }
-        });
-      }
-    }
-  }
-}
diff --git a/sitemap.xml b/sitemap.xml
index 88590662e..eca59fda4 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,154 +2,154 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/index.html</loc>
-    <lastmod>2025-02-13T21:01:15.531Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.791Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/nccl.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/multi-node.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/multipack.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/config.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/getting-started.html</loc>
-    <lastmod>2025-02-13T21:01:15.517Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/batch_vs_grad.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.775Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/multi-gpu.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/conversation.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/pretraining.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/tokenized.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/debugging.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/cli.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.775Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/ray-integration.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/torchao.html</loc>
-    <lastmod>2025-02-13T21:01:15.520Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/installation.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/src/axolotl/integrations/LICENSE.html</loc>
-    <lastmod>2025-02-13T21:01:15.534Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.794Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/src/axolotl/integrations/kd/topk_logprob/LICENSE.html</loc>
-    <lastmod>2025-02-13T21:01:15.535Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.795Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/TODO.html</loc>
-    <lastmod>2025-02-13T21:01:15.515Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.774Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/FAQS.html</loc>
-    <lastmod>2025-02-13T21:01:15.515Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.774Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/examples/colab-notebooks/colab-axolotl-example.html</loc>
-    <lastmod>2025-02-13T21:01:15.520Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.780Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html</loc>
-    <lastmod>2025-02-13T21:01:15.534Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.794Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset_preprocessing.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/mac.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/lr_groups.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/fsdp_qlora.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/reward_modelling.html</loc>
-    <lastmod>2025-02-13T21:01:15.520Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/stepwise_supervised.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/inst_tune.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/template_free.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/index.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/input_output.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/multimodal.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/unsloth.html</loc>
-    <lastmod>2025-02-13T21:01:15.520Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/inference.html</loc>
-    <lastmod>2025-02-13T21:01:15.519Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/faq.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.776Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/amd_hpc.html</loc>
-    <lastmod>2025-02-13T21:01:15.516Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.775Z</lastmod>
   </url>
   <url>
     <loc>https://axolotl-ai-cloud.github.io/axolotl/docs/rlhf.html</loc>
-    <lastmod>2025-02-13T21:01:15.520Z</lastmod>
+    <lastmod>2025-02-13T21:01:54.779Z</lastmod>
   </url>
 </urlset>