Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2024-04-27 16:08:00 +00:00
parent 8574fa264b
commit 17f1e93c5f
5 changed files with 30 additions and 30 deletions

View File

@@ -1 +1 @@
86a4e5fc
5c3c9e0d

View File

@@ -351,7 +351,7 @@ Description
</tr>
</thead>
<tbody class="list">
<tr data-index="0" data-listing-file-modified-sort="1713816016937" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
<tr data-index="0" data-listing-file-modified-sort="1714234039300" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
<td>
<a href="../../docs/dataset-formats/pretraining.html" class="title listing-title">Pre-training</a>
</td>
@@ -359,7 +359,7 @@ Description
<span class="listing-description">Data format for a pre-training completion task.</span>
</td>
</tr>
<tr data-index="1" data-listing-file-modified-sort="1713816016937" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
<tr data-index="1" data-listing-file-modified-sort="1714234039300" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
<td>
<a href="../../docs/dataset-formats/inst_tune.html" class="title listing-title">Instruction Tuning</a>
</td>
@@ -367,7 +367,7 @@ Description
<span class="listing-description">Instruction tuning formats for supervised fine-tuning.</span>
</td>
</tr>
<tr data-index="2" data-listing-file-modified-sort="1713816016937" data-listing-reading-time-sort="2" data-listing-word-count-sort="235" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
<tr data-index="2" data-listing-file-modified-sort="1714234039300" data-listing-reading-time-sort="2" data-listing-word-count-sort="235" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
<td>
<a href="../../docs/dataset-formats/conversation.html" class="title listing-title">Conversation</a>
</td>
@@ -375,7 +375,7 @@ Description
<span class="listing-description">Conversation format for supervised fine-tuning.</span>
</td>
</tr>
<tr data-index="3" data-listing-file-modified-sort="1713816016937" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
<tr data-index="3" data-listing-file-modified-sort="1714234039300" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
<td>
<a href="../../docs/dataset-formats/template_free.html" class="title listing-title">Template-Free</a>
</td>
@@ -383,7 +383,7 @@ Description
<span class="listing-description">Construct prompts without a template.</span>
</td>
</tr>
<tr data-index="4" data-listing-file-modified-sort="1713816016937" data-listing-reading-time-sort="1" data-listing-word-count-sort="22" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
<tr data-index="4" data-listing-file-modified-sort="1714234039300" data-listing-reading-time-sort="1" data-listing-word-count-sort="22" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
<td>
<a href="../../docs/dataset-formats/tokenized.html" class="title listing-title">Custom Pre-Tokenized Dataset</a>
</td>

View File

@@ -353,7 +353,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="fu">chat_template</span><span class="kw">:</span><span class="at"> chatml</span></span>
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned</span></span>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> orpo.chat_template</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.argilla</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="using-local-dataset-files" class="level4">
<h4 class="anchored" data-anchor-id="using-local-dataset-files">Using local dataset files</h4>

View File

@@ -150,7 +150,7 @@
"href": "docs/rlhf.html",
"title": "RLHF (Beta)",
"section": "",
"text": "Overview\nReinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback. Various methods include, but not limited to:\n\nProximal Policy Optimization (PPO) (not yet supported in axolotl)\nDirect Preference Optimization (DPO)\nIdentity Preference Optimization (IPO)\n\n\n\nRLHF using Axolotl\n\n[!IMPORTANT] This is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.\n\nThe various RL training methods are implemented in trl and wrapped via axolotl. Below are various examples with how you can use various preference datasets to train models that use ChatML\n\nDPO\nrl: dpo\ndatasets:\n - path: Intel/orca_dpo_pairs\n split: train\n type: chatml.intel\n - path: argilla/ultrafeedback-binarized-preferences\n split: train\n type: chatml.argilla\n\n\nIPO\nrl: ipo\n\n\nORPO\nPaper: https://arxiv.org/abs/2403.07691\nrl: orpo\norpo_alpha: 0.1\nremove_unused_columns: false\n\nchat_template: chatml\ndatasets:\n - path: argilla/ultrafeedback-binarized-preferences-cleaned\n type: orpo.chat_template\n\n\nUsing local dataset files\ndatasets:\n - ds_type: json\n data_files:\n - orca_rlhf.jsonl\n split: train\n type: chatml.intel\n\n\nTrl autounwrap for peft\nTrl supports autounwrapping peft models, so that a ref model does not need to be additionally loaded, leading to less VRAM needed. This is on by default. To turn it off, pass the following config.\n# load ref model when adapter training.\nrl_adapter_ref_model: true",
"text": "Overview\nReinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback. Various methods include, but not limited to:\n\nProximal Policy Optimization (PPO) (not yet supported in axolotl)\nDirect Preference Optimization (DPO)\nIdentity Preference Optimization (IPO)\n\n\n\nRLHF using Axolotl\n\n[!IMPORTANT] This is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.\n\nThe various RL training methods are implemented in trl and wrapped via axolotl. Below are various examples with how you can use various preference datasets to train models that use ChatML\n\nDPO\nrl: dpo\ndatasets:\n - path: Intel/orca_dpo_pairs\n split: train\n type: chatml.intel\n - path: argilla/ultrafeedback-binarized-preferences\n split: train\n type: chatml.argilla\n\n\nIPO\nrl: ipo\n\n\nORPO\nPaper: https://arxiv.org/abs/2403.07691\nrl: orpo\norpo_alpha: 0.1\nremove_unused_columns: false\n\nchat_template: chatml\ndatasets:\n - path: argilla/ultrafeedback-binarized-preferences-cleaned\n type: chat_template.argilla\n\n\nUsing local dataset files\ndatasets:\n - ds_type: json\n data_files:\n - orca_rlhf.jsonl\n split: train\n type: chatml.intel\n\n\nTrl autounwrap for peft\nTrl supports autounwrapping peft models, so that a ref model does not need to be additionally loaded, leading to less VRAM needed. This is on by default. To turn it off, pass the following config.\n# load ref model when adapter training.\nrl_adapter_ref_model: true",
"crumbs": [
"How-To Guides",
"RLHF (Beta)"

View File

@@ -2,90 +2,90 @@
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/index.html</loc>
<lastmod>2024-04-22T20:00:16.953Z</lastmod>
<lastmod>2024-04-27T16:07:19.312Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/TODO.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.296Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/multi-node.html</loc>
<lastmod>2024-04-22T20:00:16.941Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/rlhf.html</loc>
<lastmod>2024-04-22T20:00:16.941Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/nccl.html</loc>
<lastmod>2024-04-22T20:00:16.941Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/multipack.html</loc>
<lastmod>2024-04-22T20:00:16.941Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/tokenized.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/inst_tune.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/conversation.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/batch_vs_grad.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/input_output.html</loc>
<lastmod>2024-04-22T20:00:16.941Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/faq.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset_preprocessing.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/template_free.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/pretraining.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/index.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/mac.html</loc>
<lastmod>2024-04-22T20:00:16.941Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/config.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/debugging.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/fsdp_qlora.html</loc>
<lastmod>2024-04-22T20:00:16.941Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/examples/colab-notebooks/colab-axolotl-example.html</loc>
<lastmod>2024-04-22T20:00:16.941Z</lastmod>
<lastmod>2024-04-27T16:07:19.300Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/FAQS.html</loc>
<lastmod>2024-04-22T20:00:16.937Z</lastmod>
<lastmod>2024-04-27T16:07:19.296Z</lastmod>
</url>
</urlset>