Files
axolotl/docs/rlhf.html
Quarto GHA Workflow Runner 754817c8c6 Built site for gh-pages
2025-03-07 13:59:52 +00:00

1415 lines
115 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.6.42">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="description" content="Reinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback.">
<title>RLHF (Beta) Axolotl</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
/* CSS for syntax highlighting */
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
}
pre.numberSource { margin-left: 3em; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
</style>
<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
<script src="../site_libs/clipboard/clipboard.min.js"></script>
<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="../site_libs/quarto-search/fuse.min.js"></script>
<script src="../site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="../">
<link href="../favicon.jpg" rel="icon" type="image/jpeg">
<script src="../site_libs/quarto-html/quarto.js"></script>
<script src="../site_libs/quarto-html/popper.min.js"></script>
<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="../site_libs/quarto-html/anchor.min.js"></script>
<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-b53751a350365c71b6c909e95f209ed1.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="../site_libs/bootstrap/bootstrap-71f806479865a0e7fd52beea5cc266f1.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
<script id="quarto-search-options" type="application/json">{
"location": "navbar",
"copy-button": false,
"collapse-after": 3,
"panel-placement": "end",
"type": "overlay",
"limit": 50,
"keyboard-shortcut": [
"f",
"/",
"s"
],
"show-item-context": false,
"language": {
"search-no-results-text": "No results",
"search-matching-documents-text": "matching documents",
"search-copy-link-title": "Copy link to search",
"search-hide-matches-text": "Hide additional matches",
"search-more-match-text": "more match in this document",
"search-more-matches-text": "more matches in this document",
"search-clear-button-title": "Clear",
"search-text-placeholder": "",
"search-detached-cancel-button-title": "Cancel",
"search-submit-button-title": "Submit",
"search-label": "Search"
}
}</script>
<link rel="stylesheet" href="../styles.css">
</head>
<body class="nav-sidebar docked nav-fixed">
<div id="quarto-search-results"></div>
<header id="quarto-header" class="headroom fixed-top">
<nav class="navbar navbar-expand " data-bs-theme="dark">
<div class="navbar-container container-fluid">
<div class="navbar-brand-container mx-auto">
<a href="../index.html" class="navbar-brand navbar-brand-logo">
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo">
</a>
</div>
<div class="quarto-navbar-tools tools-wide tools-end">
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
<a href="https://github.com/axolotl-ai-cloud/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
</div>
<div id="quarto-search" class="" title="Search"></div>
</div> <!-- /container-fluid -->
</nav>
<nav class="quarto-secondary-nav">
<div class="container-fluid d-flex">
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<i class="bi bi-layout-text-sidebar-reverse"></i>
</button>
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/multimodal.html">How To Guides</a></li><li class="breadcrumb-item"><a href="../docs/rlhf.html">RLHF (Beta)</a></li></ol></nav>
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
</a>
</div>
</nav>
</header>
<!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
<!-- sidebar -->
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
<div class="sidebar-menu-container">
<ul class="list-unstyled mt-1">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../index.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Home</span></a>
</div>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
<span class="menu-text">Getting Started</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/getting-started.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Quickstart</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/installation.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Installation</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/cli.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">CLI Reference</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/inference.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Inference</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Dataset Formats</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Pre-training</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Instruction Tuning</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Conversation</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/dataset-formats/stepwise_supervised.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Stepwise Supervised Format</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Template-Free</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
<span class="menu-text">Deployments</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/docker.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Docker</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multi-GPU</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multi-node.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multi Node</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Ray Train</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">AMD GPUs on HPC Systems</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/mac.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Mac M-series</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
<span class="menu-text">How To Guides</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multimodal.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">MultiModal / Vision Language Models (BETA)</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/rlhf.html" class="sidebar-item-text sidebar-link active">
<span class="menu-text">RLHF (Beta)</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/reward_modelling.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Reward Modelling</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/lr_groups.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Learning Rate Groups</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/lora_optims.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">LoRA Optimizations</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
<span class="menu-text">Core Concepts</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/batch_vs_grad.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Batch size vs Gradient accumulation</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/dataset_preprocessing.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Dataset Preprocessing</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/multipack.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Multipack (Sample Packing)</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true">
<span class="menu-text">Advanced Features</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-6" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">FDSP + QLoRA</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/unsloth.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Unsloth</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/torchao.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">PyTorch ao</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/custom_integrations.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Custom Integrations</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true">
<span class="menu-text">Troubleshooting</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-7" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/faq.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">FAQ</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/debugging.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Debugging</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/nccl.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">NCCL</span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true">
<span class="menu-text">Reference</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-8" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Config options</span></a>
</div>
</li>
</ul>
</li>
</ul>
</div>
</nav>
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
<!-- margin-sidebar -->
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active" data-toc-expanded="2">
<h2 id="toc-title">On this page</h2>
<ul>
<li><a href="#overview" id="toc-overview" class="nav-link active" data-scroll-target="#overview">Overview</a></li>
<li><a href="#rlhf-using-axolotl" id="toc-rlhf-using-axolotl" class="nav-link" data-scroll-target="#rlhf-using-axolotl">RLHF using Axolotl</a>
<ul>
<li><a href="#dpo" id="toc-dpo" class="nav-link" data-scroll-target="#dpo">DPO</a>
<ul class="collapse">
<li><a href="#chatml.argilla" id="toc-chatml.argilla" class="nav-link" data-scroll-target="#chatml.argilla">chatml.argilla</a></li>
<li><a href="#chatml.argilla_chat" id="toc-chatml.argilla_chat" class="nav-link" data-scroll-target="#chatml.argilla_chat">chatml.argilla_chat</a></li>
<li><a href="#chatml.icr" id="toc-chatml.icr" class="nav-link" data-scroll-target="#chatml.icr">chatml.icr</a></li>
<li><a href="#chatml.intel" id="toc-chatml.intel" class="nav-link" data-scroll-target="#chatml.intel">chatml.intel</a></li>
<li><a href="#chatml.prompt_pairs" id="toc-chatml.prompt_pairs" class="nav-link" data-scroll-target="#chatml.prompt_pairs">chatml.prompt_pairs</a></li>
<li><a href="#chatml.ultra" id="toc-chatml.ultra" class="nav-link" data-scroll-target="#chatml.ultra">chatml.ultra</a></li>
<li><a href="#llama3.argilla" id="toc-llama3.argilla" class="nav-link" data-scroll-target="#llama3.argilla">llama3.argilla</a></li>
<li><a href="#llama3.argilla_chat" id="toc-llama3.argilla_chat" class="nav-link" data-scroll-target="#llama3.argilla_chat">llama3.argilla_chat</a></li>
<li><a href="#llama3.icr" id="toc-llama3.icr" class="nav-link" data-scroll-target="#llama3.icr">llama3.icr</a></li>
<li><a href="#llama3.intel" id="toc-llama3.intel" class="nav-link" data-scroll-target="#llama3.intel">llama3.intel</a></li>
<li><a href="#llama3.prompt_pairs" id="toc-llama3.prompt_pairs" class="nav-link" data-scroll-target="#llama3.prompt_pairs">llama3.prompt_pairs</a></li>
<li><a href="#llama3.ultra" id="toc-llama3.ultra" class="nav-link" data-scroll-target="#llama3.ultra">llama3.ultra</a></li>
<li><a href="#zephyr.nectar" id="toc-zephyr.nectar" class="nav-link" data-scroll-target="#zephyr.nectar">zephyr.nectar</a></li>
<li><a href="#chat_template.default" id="toc-chat_template.default" class="nav-link" data-scroll-target="#chat_template.default">chat_template.default</a></li>
<li><a href="#user_defined.default" id="toc-user_defined.default" class="nav-link" data-scroll-target="#user_defined.default">user_defined.default</a></li>
</ul></li>
<li><a href="#ipo" id="toc-ipo" class="nav-link" data-scroll-target="#ipo">IPO</a></li>
<li><a href="#orpo" id="toc-orpo" class="nav-link" data-scroll-target="#orpo">ORPO</a>
<ul class="collapse">
<li><a href="#chat_template.argilla" id="toc-chat_template.argilla" class="nav-link" data-scroll-target="#chat_template.argilla">chat_template.argilla</a></li>
</ul></li>
<li><a href="#kto" id="toc-kto" class="nav-link" data-scroll-target="#kto">KTO</a>
<ul class="collapse">
<li><a href="#chatml.argilla-1" id="toc-chatml.argilla-1" class="nav-link" data-scroll-target="#chatml.argilla-1">chatml.argilla</a></li>
<li><a href="#chatml.argilla_chat-1" id="toc-chatml.argilla_chat-1" class="nav-link" data-scroll-target="#chatml.argilla_chat-1">chatml.argilla_chat</a></li>
<li><a href="#chatml.intel-1" id="toc-chatml.intel-1" class="nav-link" data-scroll-target="#chatml.intel-1">chatml.intel</a></li>
<li><a href="#chatml.prompt_pairs-1" id="toc-chatml.prompt_pairs-1" class="nav-link" data-scroll-target="#chatml.prompt_pairs-1">chatml.prompt_pairs</a></li>
<li><a href="#chatml.ultra-1" id="toc-chatml.ultra-1" class="nav-link" data-scroll-target="#chatml.ultra-1">chatml.ultra</a></li>
<li><a href="#llama3.argilla-1" id="toc-llama3.argilla-1" class="nav-link" data-scroll-target="#llama3.argilla-1">llama3.argilla</a></li>
<li><a href="#llama3.argilla_chat-1" id="toc-llama3.argilla_chat-1" class="nav-link" data-scroll-target="#llama3.argilla_chat-1">llama3.argilla_chat</a></li>
<li><a href="#llama3.intel-1" id="toc-llama3.intel-1" class="nav-link" data-scroll-target="#llama3.intel-1">llama3.intel</a></li>
<li><a href="#llama3.prompt_pairs-1" id="toc-llama3.prompt_pairs-1" class="nav-link" data-scroll-target="#llama3.prompt_pairs-1">llama3.prompt_pairs</a></li>
<li><a href="#llama3.ultra-1" id="toc-llama3.ultra-1" class="nav-link" data-scroll-target="#llama3.ultra-1">llama3.ultra</a></li>
<li><a href="#user_defined.default-1" id="toc-user_defined.default-1" class="nav-link" data-scroll-target="#user_defined.default-1">user_defined.default</a></li>
</ul></li>
<li><a href="#grpo" id="toc-grpo" class="nav-link" data-scroll-target="#grpo">GRPO</a></li>
<li><a href="#using-local-dataset-files" id="toc-using-local-dataset-files" class="nav-link" data-scroll-target="#using-local-dataset-files">Using local dataset files</a></li>
<li><a href="#trl-auto-unwrapping-for-peft" id="toc-trl-auto-unwrapping-for-peft" class="nav-link" data-scroll-target="#trl-auto-unwrapping-for-peft">TRL auto-unwrapping for PEFT</a></li>
</ul></li>
</ul>
</nav>
</div>
<!-- main -->
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/multimodal.html">How To Guides</a></li><li class="breadcrumb-item"><a href="../docs/rlhf.html">RLHF (Beta)</a></li></ol></nav>
<div class="quarto-title">
<h1 class="title">RLHF (Beta)</h1>
</div>
<div>
<div class="description">
Reinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback.
</div>
</div>
<div class="quarto-title-meta">
</div>
</header>
<section id="overview" class="level2">
<h2 class="anchored" data-anchor-id="overview">Overview</h2>
<p>Reinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback. Various methods include, but not limited to:</p>
<ul>
<li><a href="#dpo">Direct Preference Optimization (DPO)</a></li>
<li><a href="#ipo">Identity Preference Optimization (IPO)</a></li>
<li><a href="#kto">Kahneman-Tversky Optimization (KTO)</a></li>
<li><a href="#orpo">Odds Ratio Preference Optimization (ORPO)</a></li>
<li>Proximal Policy Optimization (PPO) (not yet supported in axolotl)</li>
</ul>
</section>
<section id="rlhf-using-axolotl" class="level2">
<h2 class="anchored" data-anchor-id="rlhf-using-axolotl">RLHF using Axolotl</h2>
<div class="callout callout-style-default callout-important callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Important
</div>
</div>
<div class="callout-body-container callout-body">
<p>This is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality.</p>
</div>
</div>
<p>We rely on the <a href="https://github.com/huggingface/trl">TRL</a> library for implementations of various RL training methods, which we wrap around to expose in axolotl. Each method has their own supported ways of loading datasets and prompt formats.</p>
<div class="callout callout-style-default callout-tip callout-titled">
<div class="callout-header d-flex align-content-center">
<div class="callout-icon-container">
<i class="callout-icon"></i>
</div>
<div class="callout-title-container flex-fill">
Tip
</div>
</div>
<div class="callout-body-container callout-body">
<p>You can find what each method supports by going into <code>src/axolotl/prompt_strategies/{method}</code> where <code>{method}</code> is one of our supported methods. The <code>type:</code> can be retrieved from <code>{method}.{function_name}</code>.</p>
</div>
</div>
<section id="dpo" class="level3">
<h3 class="anchored" data-anchor-id="dpo">DPO</h3>
<p>Example config:</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> Intel/orca_dpo_pairs</span></span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences</span></span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>DPO supports the following types with the following dataset format:</p>
<section id="chatml.argilla" class="level4">
<h4 class="anchored" data-anchor-id="chatml.argilla">chatml.argilla</h4>
<div class="sourceCode" id="cb2"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen_response"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected_response"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chatml.argilla_chat" class="level4">
<h4 class="anchored" data-anchor-id="chatml.argilla_chat">chatml.argilla_chat</h4>
<div class="sourceCode" id="cb3"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chatml.icr" class="level4">
<h4 class="anchored" data-anchor-id="chatml.icr">chatml.icr</h4>
<div class="sourceCode" id="cb4"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chatml.intel" class="level4">
<h4 class="anchored" data-anchor-id="chatml.intel">chatml.intel</h4>
<div class="sourceCode" id="cb5"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chatml.prompt_pairs" class="level4">
<h4 class="anchored" data-anchor-id="chatml.prompt_pairs">chatml.prompt_pairs</h4>
<div class="sourceCode" id="cb6"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chatml.ultra" class="level4">
<h4 class="anchored" data-anchor-id="chatml.ultra">chatml.ultra</h4>
<div class="sourceCode" id="cb7"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.argilla" class="level4">
<h4 class="anchored" data-anchor-id="llama3.argilla">llama3.argilla</h4>
<div class="sourceCode" id="cb8"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen_response"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected_response"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.argilla_chat" class="level4">
<h4 class="anchored" data-anchor-id="llama3.argilla_chat">llama3.argilla_chat</h4>
<div class="sourceCode" id="cb9"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.icr" class="level4">
<h4 class="anchored" data-anchor-id="llama3.icr">llama3.icr</h4>
<div class="sourceCode" id="cb10"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.intel" class="level4">
<h4 class="anchored" data-anchor-id="llama3.intel">llama3.intel</h4>
<div class="sourceCode" id="cb11"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.prompt_pairs" class="level4">
<h4 class="anchored" data-anchor-id="llama3.prompt_pairs">llama3.prompt_pairs</h4>
<div class="sourceCode" id="cb12"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.ultra" class="level4">
<h4 class="anchored" data-anchor-id="llama3.ultra">llama3.ultra</h4>
<div class="sourceCode" id="cb13"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="zephyr.nectar" class="level4">
<h4 class="anchored" data-anchor-id="zephyr.nectar">zephyr.nectar</h4>
<div class="sourceCode" id="cb14"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"answers"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span></span>
<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rank"</span><span class="fu">:</span> <span class="dv">1</span></span>
<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span><span class="ot">,</span></span>
<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span></span>
<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a> <span class="dt">"answer"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb14-10"><a href="#cb14-10" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rank"</span><span class="fu">:</span> <span class="dv">2</span></span>
<span id="cb14-11"><a href="#cb14-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span></span>
<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a> <span class="er">//</span> <span class="er">...</span> <span class="er">more</span> <span class="er">answers</span> <span class="er">with</span> <span class="er">ranks</span></span>
<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chat_template.default" class="level4">
<h4 class="anchored" data-anchor-id="chat_template.default">chat_template.default</h4>
<div class="sourceCode" id="cb15"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.default</span></span>
<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_messages</span><span class="kw">:</span><span class="at"> </span><span class="st">"messages"</span></span>
<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_chosen</span><span class="kw">:</span><span class="at"> </span><span class="st">"chosen"</span></span>
<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_rejected</span><span class="kw">:</span><span class="at"> </span><span class="st">"rejected"</span></span>
<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">message_property_mappings</span><span class="kw">:</span></span>
<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">role</span><span class="kw">:</span><span class="at"> role</span></span>
<span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">content</span><span class="kw">:</span><span class="at"> content</span></span>
<span id="cb15-12"><a href="#cb15-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">roles</span><span class="kw">:</span></span>
<span id="cb15-13"><a href="#cb15-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">user</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"user"</span><span class="kw">]</span></span>
<span id="cb15-14"><a href="#cb15-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">assistant</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"assistant"</span><span class="kw">]</span></span>
<span id="cb15-15"><a href="#cb15-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">system</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"system"</span><span class="kw">]</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Sample input format:</p>
<div class="sourceCode" id="cb16"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"messages"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"system"</span><span class="fu">,</span></span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span><span class="ot">,</span></span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span></span>
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span></span>
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span><span class="ot">,</span></span>
<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a> <span class="er">//</span> <span class="er">...</span> <span class="er">more</span> <span class="er">messages</span></span>
<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="fu">{</span></span>
<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a> <span class="fu">},</span></span>
<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="fu">{</span></span>
<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a> <span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span></span>
<span id="cb16-19"><a href="#cb16-19" aria-hidden="true" tabindex="-1"></a> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb16-20"><a href="#cb16-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span></span>
<span id="cb16-21"><a href="#cb16-21" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="user_defined.default" class="level4">
<h4 class="anchored" data-anchor-id="user_defined.default">user_defined.default</h4>
<p>For custom behaviors,</p>
<div class="sourceCode" id="cb17"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> user_defined.default</span></span>
<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">"prompt"</span></span>
<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_system</span><span class="kw">:</span><span class="at"> </span><span class="st">"system"</span></span>
<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_chosen</span><span class="kw">:</span><span class="at"> </span><span class="st">"chosen"</span></span>
<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_rejected</span><span class="kw">:</span><span class="at"> </span><span class="st">"rejected"</span></span>
<span id="cb17-11"><a href="#cb17-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">prompt_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{prompt}"</span></span>
<span id="cb17-12"><a href="#cb17-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">chosen_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{chosen}"</span></span>
<span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">rejected_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{rejected}"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>The input format is a simple JSON input with customizable fields based on the above config.</p>
<div class="sourceCode" id="cb18"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
</section>
<section id="ipo" class="level3">
<h3 class="anchored" data-anchor-id="ipo">IPO</h3>
<p>As IPO is just DPO with a different loss function, all supported options for DPO works here.</p>
<div class="sourceCode" id="cb19"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> ipo</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="orpo" class="level3">
<h3 class="anchored" data-anchor-id="orpo">ORPO</h3>
<p>Paper: https://arxiv.org/abs/2403.07691</p>
<div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> orpo</span></span>
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="fu">orpo_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="fu">chat_template</span><span class="kw">:</span><span class="at"> chatml</span></span>
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned</span></span>
<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.argilla</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>ORPO supports the following types with the following dataset format:</p>
<section id="chat_template.argilla" class="level4">
<h4 class="anchored" data-anchor-id="chat_template.argilla">chat_template.argilla</h4>
<div class="sourceCode" id="cb21"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">if</span> <span class="er">available</span><span class="fu">,</span> <span class="er">will</span> <span class="er">be</span> <span class="er">taken</span> <span class="er">as</span> <span class="er">user</span> <span class="er">message</span> <span class="er">for</span> <span class="er">single-turn</span> <span class="er">instead</span> <span class="er">of</span> <span class="er">from</span> <span class="er">list</span> <span class="er">below</span></span>
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a> <span class="er">//</span> <span class="er">chosen/rejected</span> <span class="er">should</span> <span class="er">be</span> <span class="er">same</span> <span class="er">till</span> <span class="er">last</span> <span class="er">content</span> <span class="er">and</span> <span class="er">only</span> <span class="er">even-number</span> <span class="er">of</span> <span class="er">alternating</span> <span class="er">user/assistant</span> <span class="er">turns</span></span>
<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb21-12"><a href="#cb21-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb21-13"><a href="#cb21-13" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
<span id="cb21-14"><a href="#cb21-14" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
</section>
<section id="kto" class="level3">
<h3 class="anchored" data-anchor-id="kto">KTO</h3>
<div class="sourceCode" id="cb22"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span></span>
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="fu">kto_desirable_weight</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.2</span></span>
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a><span class="fu">remove_unused_columns</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned-kto</span></span>
<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> llama3.ultra</span></span>
<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing_kwargs</span><span class="kw">:</span></span>
<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_reentrant</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>KTO supports the following types with the following dataset format:</p>
<section id="chatml.argilla-1" class="level4">
<h4 class="anchored" data-anchor-id="chatml.argilla-1">chatml.argilla</h4>
<div class="sourceCode" id="cb23"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chatml.argilla_chat-1" class="level4">
<h4 class="anchored" data-anchor-id="chatml.argilla_chat-1">chatml.argilla_chat</h4>
<div class="sourceCode" id="cb24"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span><span class="fu">,</span></span>
<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chatml.intel-1" class="level4">
<h4 class="anchored" data-anchor-id="chatml.intel-1">chatml.intel</h4>
<div class="sourceCode" id="cb25"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chatml.prompt_pairs-1" class="level4">
<h4 class="anchored" data-anchor-id="chatml.prompt_pairs-1">chatml.prompt_pairs</h4>
<div class="sourceCode" id="cb26"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="chatml.ultra-1" class="level4">
<h4 class="anchored" data-anchor-id="chatml.ultra-1">chatml.ultra</h4>
<div class="sourceCode" id="cb27"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.argilla-1" class="level4">
<h4 class="anchored" data-anchor-id="llama3.argilla-1">llama3.argilla</h4>
<div class="sourceCode" id="cb28"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.argilla_chat-1" class="level4">
<h4 class="anchored" data-anchor-id="llama3.argilla_chat-1">llama3.argilla_chat</h4>
<div class="sourceCode" id="cb29"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="ot">[</span></span>
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"assistant"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.intel-1" class="level4">
<h4 class="anchored" data-anchor-id="llama3.intel-1">llama3.intel</h4>
<div class="sourceCode" id="cb30"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.prompt_pairs-1" class="level4">
<h4 class="anchored" data-anchor-id="llama3.prompt_pairs-1">llama3.prompt_pairs</h4>
<div class="sourceCode" id="cb31"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="llama3.ultra-1" class="level4">
<h4 class="anchored" data-anchor-id="llama3.ultra-1">llama3.ultra</h4>
<div class="sourceCode" id="cb32"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="user_defined.default-1" class="level4">
<h4 class="anchored" data-anchor-id="user_defined.default-1">user_defined.default</h4>
<p>For custom behaviors,</p>
<div class="sourceCode" id="cb33"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
<span id="cb33-5"><a href="#cb33-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> user_defined.default</span></span>
<span id="cb33-6"><a href="#cb33-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb33-7"><a href="#cb33-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_prompt</span><span class="kw">:</span><span class="at"> </span><span class="st">"prompt"</span></span>
<span id="cb33-8"><a href="#cb33-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_system</span><span class="kw">:</span><span class="at"> </span><span class="st">"system"</span></span>
<span id="cb33-9"><a href="#cb33-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_completion</span><span class="kw">:</span><span class="at"> </span><span class="st">"completion"</span></span>
<span id="cb33-10"><a href="#cb33-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_label</span><span class="kw">:</span><span class="at"> </span><span class="st">"label"</span></span>
<span id="cb33-11"><a href="#cb33-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">prompt_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{prompt}"</span></span>
<span id="cb33-12"><a href="#cb33-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">completion_format</span><span class="kw">:</span><span class="at"> </span><span class="st">"{completion}"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>The input format is a simple JSON input with customizable fields based on the above config.</p>
<div class="sourceCode" id="cb34"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
<span id="cb34-5"><a href="#cb34-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"label"</span><span class="fu">:</span> <span class="st">"..."</span></span>
<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
</section>
<section id="grpo" class="level3">
<h3 class="anchored" data-anchor-id="grpo">GRPO</h3>
<p>GRPO uses custom reward functions and transformations. Please have them ready locally.</p>
<p>For ex, to load OpenAIs GSM8K and use a random reward for completions:</p>
<div class="sourceCode" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="co"># rewards.py</span></span>
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> random</span>
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> rand_reward_func(completions, <span class="op">**</span>kwargs) <span class="op">-&gt;</span> <span class="bu">list</span>[<span class="bu">float</span>]:</span>
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> [random.uniform(<span class="dv">0</span>, <span class="dv">1</span>) <span class="cf">for</span> _ <span class="kw">in</span> completions]</span>
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> oai_gsm8k_transform(cfg, <span class="op">*</span>args, <span class="op">**</span>kwargs):</span>
<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a> <span class="kw">def</span> transform_fn(example, tokenizer<span class="op">=</span><span class="va">None</span>):</span>
<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a> label <span class="op">=</span> example[<span class="st">"answer"</span>].split(<span class="st">"####"</span>)[<span class="op">-</span><span class="dv">1</span>].strip().replace(<span class="st">","</span>, <span class="st">""</span>)</span>
<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> {</span>
<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"prompt"</span>: [{<span class="st">"role"</span>: <span class="st">"user"</span>, <span class="st">"content"</span>: example[<span class="st">"question"</span>]},],</span>
<span id="cb35-12"><a href="#cb35-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"answer"</span>: label,</span>
<span id="cb35-13"><a href="#cb35-13" aria-hidden="true" tabindex="-1"></a> }</span>
<span id="cb35-14"><a href="#cb35-14" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> transform_fn, {<span class="st">"remove_columns"</span>: [<span class="st">"question"</span>]}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="sourceCode" id="cb36"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> grpo</span></span>
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.001</span></span>
<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">max_completion_length</span><span class="kw">:</span><span class="at"> </span><span class="dv">256</span></span>
<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_vllm</span><span class="kw">:</span><span class="at"> </span><span class="ch">True</span></span>
<span id="cb36-7"><a href="#cb36-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_device</span><span class="kw">:</span><span class="at"> auto</span></span>
<span id="cb36-8"><a href="#cb36-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_gpu_memory_utilization</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.15</span></span>
<span id="cb36-9"><a href="#cb36-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">num_generations</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span></span>
<span id="cb36-10"><a href="#cb36-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reward_funcs</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"rewards.rand_reward_func"</span><span class="kw">]</span><span class="co"> # format: '{file_name}.{fn_name}'</span></span>
<span id="cb36-11"><a href="#cb36-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reward_weights</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="fl">1.0</span><span class="kw">]</span></span>
<span id="cb36-12"><a href="#cb36-12" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
<span id="cb36-13"><a href="#cb36-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> openai/gsm8k</span></span>
<span id="cb36-14"><a href="#cb36-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">name</span><span class="kw">:</span><span class="at"> main</span></span>
<span id="cb36-15"><a href="#cb36-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> rewards.oai_gsm8k_transform</span><span class="co"> # format: '{file_name}.{fn_name}'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>To see other examples of custom reward functions, please see <a href="https://github.com/huggingface/trl/blob/main/docs/source/grpo_trainer.md#using-a-custom-reward-function">TRL GRPO Docs</a>.</p>
<p>To see description of the configs, please see <a href="https://github.com/axolotl-ai-cloud/axolotl/blob/main/src/axolotl/utils/config/models/input/v0_4_1/trl.py">TRLConfig</a>.</p>
</section>
<section id="using-local-dataset-files" class="level3">
<h3 class="anchored" data-anchor-id="using-local-dataset-files">Using local dataset files</h3>
<div class="sourceCode" id="cb37"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span></span>
<span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">data_files</span><span class="kw">:</span></span>
<span id="cb37-4"><a href="#cb37-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> orca_rlhf.jsonl</span></span>
<span id="cb37-5"><a href="#cb37-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
<span id="cb37-6"><a href="#cb37-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="trl-auto-unwrapping-for-peft" class="level3">
<h3 class="anchored" data-anchor-id="trl-auto-unwrapping-for-peft">TRL auto-unwrapping for PEFT</h3>
<p>TRL supports auto-unwrapping PEFT models for RL training paradigms which rely on a reference model. This significantly reduces memory pressure as an additional refreference model does not need to be loaded, and reference model log-probabilities can be obtained by disabling PEFT adapters. This is enabled by default. To turn it off, pass the following config:</p>
<div class="sourceCode" id="cb38"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># load ref model when adapter training.</span></span>
<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_adapter_ref_model</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
</section>
</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const onCopySuccess = function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
}
const getTextToCopy = function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
text: getTextToCopy
});
clipboard.on('success', onCopySuccess);
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
text: getTextToCopy,
container: window.document.getElementById('quarto-embedded-source-code-modal')
});
clipboardModal.on('success', onCopySuccess);
}
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
var filterRegex = new RegExp("https:\/\/axolotl-ai-cloud\.github\.io\/axolotl\/");
var isInternal = (href) => {
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
}
// Inspect non-navigation links and adorn them if external
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
for (var i=0; i<links.length; i++) {
const link = links[i];
if (!isInternal(link.href)) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if (link.dataset.originalHref !== undefined) {
link.href = link.dataset.originalHref;
}
}
}
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note) {
return note.innerHTML;
} else {
return "";
}
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
</div> <!-- /content -->
</body></html>