Built site for gh-pages
This commit is contained in:
468
docs/rlhf.html
468
docs/rlhf.html
@@ -70,10 +70,10 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-2f5df379a58b258e96c21c0638c20c03.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-b53751a350365c71b6c909e95f209ed1.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../site_libs/bootstrap/bootstrap-141b2cdb37a94fcfd6825c1581ff795f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
|
||||
<link href="../site_libs/bootstrap/bootstrap-0cda210ced8960466d2ee7bf22d15016.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
@@ -113,8 +113,8 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a class="navbar-brand" href="../index.html">
|
||||
<span class="navbar-title">Axolotl</span>
|
||||
<a href="../index.html" class="navbar-brand navbar-brand-logo">
|
||||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo">
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
@@ -130,7 +130,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||||
</button>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/rlhf.html">RLHF (Beta)</a></li></ol></nav>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/multimodal.html">How To Guides</a></li><li class="breadcrumb-item"><a href="../docs/rlhf.html">RLHF (Beta)</a></li></ol></nav>
|
||||
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
</a>
|
||||
</div>
|
||||
@@ -151,7 +151,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">How-To Guides</span></a>
|
||||
<span class="menu-text">Getting Started</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -160,91 +160,25 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/getting-started.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Getting Started with Axolotl</span></a>
|
||||
<span class="menu-text">Quickstart</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/installation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Installation Guide</span></a>
|
||||
<span class="menu-text">Installation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
<a href="../docs/cli.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">CLI Reference</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/inference.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Inference Guide</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-free prompt construction</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/rlhf.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi-GPU Training Guide</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/unsloth.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Unsloth</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Training with AMD GPUs on HPC Systems</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Ray Train integration</span></a>
|
||||
<span class="menu-text">Inference</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
@@ -299,7 +233,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<span class="menu-text">Deployments</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -307,18 +241,187 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi-GPU</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Ray Train</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">AMD GPUs on HPC Systems</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">How To Guides</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multimodal.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">MultiModal / Vision Language Models (BETA)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/rlhf.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/reward_modelling.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Reward Modelling</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/lr_groups.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Learning Rate Groups</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/lora_optims.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">LoRA Optimizations</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Core Concepts</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/batch_vs_grad.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Batch size vs Gradient accumulation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset_preprocessing.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Preprocessing</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Advanced Features</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-6" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/unsloth.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Unsloth</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/torchao.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">PyTorch ao</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/custom_integrations.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Integrations</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Troubleshooting</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-7" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-8" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
@@ -331,7 +434,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<ul>
|
||||
<li><a href="#overview" id="toc-overview" class="nav-link active" data-scroll-target="#overview">Overview</a></li>
|
||||
<li><a href="#rlhf-using-axolotl" id="toc-rlhf-using-axolotl" class="nav-link" data-scroll-target="#rlhf-using-axolotl">RLHF using Axolotl</a>
|
||||
<ul class="collapse">
|
||||
<ul>
|
||||
<li><a href="#dpo" id="toc-dpo" class="nav-link" data-scroll-target="#dpo">DPO</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#chatml.argilla" id="toc-chatml.argilla" class="nav-link" data-scroll-target="#chatml.argilla">chatml.argilla</a></li>
|
||||
@@ -369,6 +472,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<li><a href="#llama3.ultra-1" id="toc-llama3.ultra-1" class="nav-link" data-scroll-target="#llama3.ultra-1">llama3.ultra</a></li>
|
||||
<li><a href="#user_defined.default-1" id="toc-user_defined.default-1" class="nav-link" data-scroll-target="#user_defined.default-1">user_defined.default</a></li>
|
||||
</ul></li>
|
||||
<li><a href="#grpo" id="toc-grpo" class="nav-link" data-scroll-target="#grpo">GRPO</a></li>
|
||||
<li><a href="#using-local-dataset-files" id="toc-using-local-dataset-files" class="nav-link" data-scroll-target="#using-local-dataset-files">Using local dataset files</a></li>
|
||||
<li><a href="#trl-auto-unwrapping-for-peft" id="toc-trl-auto-unwrapping-for-peft" class="nav-link" data-scroll-target="#trl-auto-unwrapping-for-peft">TRL auto-unwrapping for PEFT</a></li>
|
||||
</ul></li>
|
||||
@@ -378,7 +482,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/rlhf.html">RLHF (Beta)</a></li></ol></nav>
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/multimodal.html">How To Guides</a></li><li class="breadcrumb-item"><a href="../docs/rlhf.html">RLHF (Beta)</a></li></ol></nav>
|
||||
<div class="quarto-title">
|
||||
<h1 class="title">RLHF (Beta)</h1>
|
||||
</div>
|
||||
@@ -402,19 +506,19 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
</header>
|
||||
|
||||
|
||||
<section id="overview" class="level1">
|
||||
<h1>Overview</h1>
|
||||
<section id="overview" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="overview">Overview</h2>
|
||||
<p>Reinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback. Various methods include, but not limited to:</p>
|
||||
<ul>
|
||||
<li>Proximal Policy Optimization (PPO) (not yet supported in axolotl)</li>
|
||||
<li><a href="#dpo">Direct Preference Optimization (DPO)</a></li>
|
||||
<li><a href="#ipo">Identity Preference Optimization (IPO)</a></li>
|
||||
<li><a href="#kto">Kahneman-Tversky Optimization (KTO)</a></li>
|
||||
<li><a href="#orpo">Odds Ratio Preference Optimization (ORPO)</a></li>
|
||||
<li>Proximal Policy Optimization (PPO) (not yet supported in axolotl)</li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="rlhf-using-axolotl" class="level1">
|
||||
<h1>RLHF using Axolotl</h1>
|
||||
<section id="rlhf-using-axolotl" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="rlhf-using-axolotl">RLHF using Axolotl</h2>
|
||||
<div class="callout callout-style-default callout-important callout-titled">
|
||||
<div class="callout-header d-flex align-content-center">
|
||||
<div class="callout-icon-container">
|
||||
@@ -442,8 +546,8 @@ Tip
|
||||
<p>You can find what each method supports by going into <code>src/axolotl/prompt_strategies/{method}</code> where <code>{method}</code> is one of our supported methods. The <code>type:</code> can be retrieved from <code>{method}.{function_name}</code>.</p>
|
||||
</div>
|
||||
</div>
|
||||
<section id="dpo" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="dpo">DPO</h2>
|
||||
<section id="dpo" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="dpo">DPO</h3>
|
||||
<p>Example config:</p>
|
||||
<div class="sourceCode" id="cb1"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
|
||||
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
@@ -454,8 +558,8 @@ Tip
|
||||
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>DPO supports the following types with the following dataset format:</p>
|
||||
<section id="chatml.argilla" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.argilla">chatml.argilla</h3>
|
||||
<section id="chatml.argilla" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.argilla">chatml.argilla</h4>
|
||||
<div class="sourceCode" id="cb2"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -463,8 +567,8 @@ Tip
|
||||
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected_response"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chatml.argilla_chat" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.argilla_chat">chatml.argilla_chat</h3>
|
||||
<section id="chatml.argilla_chat" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.argilla_chat">chatml.argilla_chat</h4>
|
||||
<div class="sourceCode" id="cb3"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
@@ -476,8 +580,8 @@ Tip
|
||||
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chatml.icr" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.icr">chatml.icr</h3>
|
||||
<section id="chatml.icr" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.icr">chatml.icr</h4>
|
||||
<div class="sourceCode" id="cb4"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -485,8 +589,8 @@ Tip
|
||||
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chatml.intel" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.intel">chatml.intel</h3>
|
||||
<section id="chatml.intel" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.intel">chatml.intel</h4>
|
||||
<div class="sourceCode" id="cb5"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -494,8 +598,8 @@ Tip
|
||||
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chatml.prompt_pairs" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.prompt_pairs">chatml.prompt_pairs</h3>
|
||||
<section id="chatml.prompt_pairs" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.prompt_pairs">chatml.prompt_pairs</h4>
|
||||
<div class="sourceCode" id="cb6"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -503,8 +607,8 @@ Tip
|
||||
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chatml.ultra" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.ultra">chatml.ultra</h3>
|
||||
<section id="chatml.ultra" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.ultra">chatml.ultra</h4>
|
||||
<div class="sourceCode" id="cb7"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -518,8 +622,8 @@ Tip
|
||||
<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.argilla" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.argilla">llama3.argilla</h3>
|
||||
<section id="llama3.argilla" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.argilla">llama3.argilla</h4>
|
||||
<div class="sourceCode" id="cb8"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -527,8 +631,8 @@ Tip
|
||||
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected_response"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.argilla_chat" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.argilla_chat">llama3.argilla_chat</h3>
|
||||
<section id="llama3.argilla_chat" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.argilla_chat">llama3.argilla_chat</h4>
|
||||
<div class="sourceCode" id="cb9"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
@@ -540,8 +644,8 @@ Tip
|
||||
<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.icr" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.icr">llama3.icr</h3>
|
||||
<section id="llama3.icr" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.icr">llama3.icr</h4>
|
||||
<div class="sourceCode" id="cb10"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -549,8 +653,8 @@ Tip
|
||||
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.intel" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.intel">llama3.intel</h3>
|
||||
<section id="llama3.intel" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.intel">llama3.intel</h4>
|
||||
<div class="sourceCode" id="cb11"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -558,8 +662,8 @@ Tip
|
||||
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.prompt_pairs" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.prompt_pairs">llama3.prompt_pairs</h3>
|
||||
<section id="llama3.prompt_pairs" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.prompt_pairs">llama3.prompt_pairs</h4>
|
||||
<div class="sourceCode" id="cb12"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -567,8 +671,8 @@ Tip
|
||||
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"rejected"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.ultra" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.ultra">llama3.ultra</h3>
|
||||
<section id="llama3.ultra" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.ultra">llama3.ultra</h4>
|
||||
<div class="sourceCode" id="cb13"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
@@ -582,8 +686,8 @@ Tip
|
||||
<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="zephyr.nectar" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="zephyr.nectar">zephyr.nectar</h3>
|
||||
<section id="zephyr.nectar" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="zephyr.nectar">zephyr.nectar</h4>
|
||||
<div class="sourceCode" id="cb14"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"answers"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
@@ -599,8 +703,8 @@ Tip
|
||||
<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chat_template.default" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chat_template.default">chat_template.default</h3>
|
||||
<section id="chat_template.default" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chat_template.default">chat_template.default</h4>
|
||||
<div class="sourceCode" id="cb15"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
|
||||
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
|
||||
@@ -639,8 +743,8 @@ Tip
|
||||
<span id="cb16-20"><a href="#cb16-20" aria-hidden="true" tabindex="-1"></a> <span class="fu">}</span></span>
|
||||
<span id="cb16-21"><a href="#cb16-21" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="user_defined.default" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="user_defined.default">user_defined.default</h3>
|
||||
<section id="user_defined.default" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="user_defined.default">user_defined.default</h4>
|
||||
<p>For custom behaviors,</p>
|
||||
<div class="sourceCode" id="cb17"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> dpo</span></span>
|
||||
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
@@ -664,13 +768,13 @@ Tip
|
||||
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="ipo" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="ipo">IPO</h2>
|
||||
<section id="ipo" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="ipo">IPO</h3>
|
||||
<p>As IPO is just DPO with a different loss function, all supported options for DPO works here.</p>
|
||||
<div class="sourceCode" id="cb19"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> ipo</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="orpo" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="orpo">ORPO</h2>
|
||||
<section id="orpo" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="orpo">ORPO</h3>
|
||||
<p>Paper: https://arxiv.org/abs/2403.07691</p>
|
||||
<div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> orpo</span></span>
|
||||
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="fu">orpo_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
|
||||
@@ -681,8 +785,8 @@ Tip
|
||||
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> argilla/ultrafeedback-binarized-preferences-cleaned</span></span>
|
||||
<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chat_template.argilla</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>ORPO supports the following types with the following dataset format:</p>
|
||||
<section id="chat_template.argilla" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chat_template.argilla">chat_template.argilla</h3>
|
||||
<section id="chat_template.argilla" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chat_template.argilla">chat_template.argilla</h4>
|
||||
<div class="sourceCode" id="cb21"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">if</span> <span class="er">available</span><span class="fu">,</span> <span class="er">will</span> <span class="er">be</span> <span class="er">taken</span> <span class="er">as</span> <span class="er">user</span> <span class="er">message</span> <span class="er">for</span> <span class="er">single-turn</span> <span class="er">instead</span> <span class="er">of</span> <span class="er">from</span> <span class="er">list</span> <span class="er">below</span></span>
|
||||
@@ -699,8 +803,8 @@ Tip
|
||||
<span id="cb21-14"><a href="#cb21-14" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="kto" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="kto">KTO</h2>
|
||||
<section id="kto" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="kto">KTO</h3>
|
||||
<div class="sourceCode" id="cb22"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
|
||||
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span></span>
|
||||
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="fu">kto_desirable_weight</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.2</span></span>
|
||||
@@ -716,16 +820,16 @@ Tip
|
||||
<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing_kwargs</span><span class="kw">:</span></span>
|
||||
<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_reentrant</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>KTO supports the following types with the following dataset format:</p>
|
||||
<section id="chatml.argilla-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.argilla-1">chatml.argilla</h3>
|
||||
<section id="chatml.argilla-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.argilla-1">chatml.argilla</h4>
|
||||
<div class="sourceCode" id="cb23"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chatml.argilla_chat-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.argilla_chat-1">chatml.argilla_chat</h3>
|
||||
<section id="chatml.argilla_chat-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.argilla_chat-1">chatml.argilla_chat</h4>
|
||||
<div class="sourceCode" id="cb24"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"chosen"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span></span>
|
||||
@@ -735,40 +839,40 @@ Tip
|
||||
<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chatml.intel-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.intel-1">chatml.intel</h3>
|
||||
<section id="chatml.intel-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.intel-1">chatml.intel</h4>
|
||||
<div class="sourceCode" id="cb25"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chatml.prompt_pairs-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.prompt_pairs-1">chatml.prompt_pairs</h3>
|
||||
<section id="chatml.prompt_pairs-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.prompt_pairs-1">chatml.prompt_pairs</h4>
|
||||
<div class="sourceCode" id="cb26"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="chatml.ultra-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="chatml.ultra-1">chatml.ultra</h3>
|
||||
<section id="chatml.ultra-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="chatml.ultra-1">chatml.ultra</h4>
|
||||
<div class="sourceCode" id="cb27"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.argilla-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.argilla-1">llama3.argilla</h3>
|
||||
<section id="llama3.argilla-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.argilla-1">llama3.argilla</h4>
|
||||
<div class="sourceCode" id="cb28"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"instruction"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.argilla_chat-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.argilla_chat-1">llama3.argilla_chat</h3>
|
||||
<section id="llama3.argilla_chat-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.argilla_chat-1">llama3.argilla_chat</h4>
|
||||
<div class="sourceCode" id="cb29"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="ot">[</span></span>
|
||||
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"role"</span><span class="fu">:</span> <span class="st">"user"</span><span class="fu">,</span> <span class="dt">"content"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">}</span><span class="ot">,</span></span>
|
||||
@@ -776,32 +880,32 @@ Tip
|
||||
<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a> <span class="ot">]</span></span>
|
||||
<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.intel-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.intel-1">llama3.intel</h3>
|
||||
<section id="llama3.intel-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.intel-1">llama3.intel</h4>
|
||||
<div class="sourceCode" id="cb30"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"question"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.prompt_pairs-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.prompt_pairs-1">llama3.prompt_pairs</h3>
|
||||
<section id="llama3.prompt_pairs-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.prompt_pairs-1">llama3.prompt_pairs</h4>
|
||||
<div class="sourceCode" id="cb31"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb31-5"><a href="#cb31-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="llama3.ultra-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="llama3.ultra-1">llama3.ultra</h3>
|
||||
<section id="llama3.ultra-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="llama3.ultra-1">llama3.ultra</h4>
|
||||
<div class="sourceCode" id="cb32"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||||
<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"system"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span> <span class="er">//</span> <span class="er">optional</span></span>
|
||||
<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"prompt"</span><span class="fu">:</span> <span class="st">"..."</span><span class="fu">,</span></span>
|
||||
<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"completion"</span><span class="fu">:</span> <span class="st">"..."</span></span>
|
||||
<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="user_defined.default-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="user_defined.default-1">user_defined.default</h3>
|
||||
<section id="user_defined.default-1" class="level4">
|
||||
<h4 class="anchored" data-anchor-id="user_defined.default-1">user_defined.default</h4>
|
||||
<p>For custom behaviors,</p>
|
||||
<div class="sourceCode" id="cb33"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> kto</span></span>
|
||||
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
@@ -824,20 +928,54 @@ Tip
|
||||
<span id="cb34-6"><a href="#cb34-6" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="using-local-dataset-files" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="using-local-dataset-files">Using local dataset files</h2>
|
||||
<div class="sourceCode" id="cb35"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span></span>
|
||||
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">data_files</span><span class="kw">:</span></span>
|
||||
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> orca_rlhf.jsonl</span></span>
|
||||
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<section id="grpo" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="grpo">GRPO</h3>
|
||||
<p>GRPO uses custom reward functions and transformations. Please have them ready locally.</p>
|
||||
<p>For ex, to load OpenAI’s GSM8K and use a random reward for completions:</p>
|
||||
<div class="sourceCode" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="co"># rewards.py</span></span>
|
||||
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> random</span>
|
||||
<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> rand_reward_func(completions, <span class="op">**</span>kwargs) <span class="op">-></span> <span class="bu">list</span>[<span class="bu">float</span>]:</span>
|
||||
<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> [random.uniform(<span class="dv">0</span>, <span class="dv">1</span>) <span class="cf">for</span> _ <span class="kw">in</span> completions]</span>
|
||||
<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> oai_gsm8k_transform(cfg, <span class="op">*</span>args, <span class="op">**</span>kwargs):</span>
|
||||
<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a> <span class="kw">def</span> transform_fn(example, tokenizer<span class="op">=</span><span class="va">None</span>):</span>
|
||||
<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a> label <span class="op">=</span> example[<span class="st">"answer"</span>].split(<span class="st">"####"</span>)[<span class="op">-</span><span class="dv">1</span>].strip().replace(<span class="st">","</span>, <span class="st">""</span>)</span>
|
||||
<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> {</span>
|
||||
<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a> <span class="st">"prompt"</span>: [{<span class="st">"role"</span>: <span class="st">"user"</span>, <span class="st">"content"</span>: example[<span class="st">"question"</span>]},],</span>
|
||||
<span id="cb35-12"><a href="#cb35-12" aria-hidden="true" tabindex="-1"></a> <span class="st">"answer"</span>: label,</span>
|
||||
<span id="cb35-13"><a href="#cb35-13" aria-hidden="true" tabindex="-1"></a> }</span>
|
||||
<span id="cb35-14"><a href="#cb35-14" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> transform_fn, {<span class="st">"remove_columns"</span>: [<span class="st">"question"</span>]}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb36"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> grpo</span></span>
|
||||
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||||
<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">beta</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.001</span></span>
|
||||
<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">max_completion_length</span><span class="kw">:</span><span class="at"> </span><span class="dv">256</span></span>
|
||||
<span id="cb36-6"><a href="#cb36-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_vllm</span><span class="kw">:</span><span class="at"> </span><span class="ch">True</span></span>
|
||||
<span id="cb36-7"><a href="#cb36-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_device</span><span class="kw">:</span><span class="at"> auto</span></span>
|
||||
<span id="cb36-8"><a href="#cb36-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_gpu_memory_utilization</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.15</span></span>
|
||||
<span id="cb36-9"><a href="#cb36-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">num_generations</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span></span>
|
||||
<span id="cb36-10"><a href="#cb36-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reward_funcs</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"rewards.rand_reward_func"</span><span class="kw">]</span><span class="co"> # format: '{file_name}.{fn_name}'</span></span>
|
||||
<span id="cb36-11"><a href="#cb36-11" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb36-12"><a href="#cb36-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> openai/gsm8k</span></span>
|
||||
<span id="cb36-13"><a href="#cb36-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">name</span><span class="kw">:</span><span class="at"> main</span></span>
|
||||
<span id="cb36-14"><a href="#cb36-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> rewards.oai_gsm8k_transform</span><span class="co"> # format: '{file_name}.{fn_name}'</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>To see other examples of custom reward functions, please see <a href="https://github.com/huggingface/trl/blob/main/docs/source/grpo_trainer.md#using-a-custom-reward-function">TRL GRPO Docs</a>.</p>
|
||||
</section>
|
||||
<section id="trl-auto-unwrapping-for-peft" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="trl-auto-unwrapping-for-peft">TRL auto-unwrapping for PEFT</h2>
|
||||
<section id="using-local-dataset-files" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="using-local-dataset-files">Using local dataset files</h3>
|
||||
<div class="sourceCode" id="cb37"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||||
<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">ds_type</span><span class="kw">:</span><span class="at"> json</span></span>
|
||||
<span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">data_files</span><span class="kw">:</span></span>
|
||||
<span id="cb37-4"><a href="#cb37-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> orca_rlhf.jsonl</span></span>
|
||||
<span id="cb37-5"><a href="#cb37-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train</span></span>
|
||||
<span id="cb37-6"><a href="#cb37-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> chatml.intel</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
</section>
|
||||
<section id="trl-auto-unwrapping-for-peft" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="trl-auto-unwrapping-for-peft">TRL auto-unwrapping for PEFT</h3>
|
||||
<p>TRL supports auto-unwrapping PEFT models for RL training paradigms which rely on a reference model. This significantly reduces memory pressure as an additional refreference model does not need to be loaded, and reference model log-probabilities can be obtained by disabling PEFT adapters. This is enabled by default. To turn it off, pass the following config:</p>
|
||||
<div class="sourceCode" id="cb36"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="co"># load ref model when adapter training.</span></span>
|
||||
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_adapter_ref_model</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb38"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># load ref model when adapter training.</span></span>
|
||||
<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl_adapter_ref_model</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
Reference in New Issue
Block a user