2269 lines
120 KiB
HTML
2269 lines
120 KiB
HTML
<!DOCTYPE html>
|
||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="generator" content="quarto-1.9.36">
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||
|
||
<meta name="description" content="Energy-Based Fine-Tuning uses feature-matching rewards from internal representations to train language models without external reward functions.">
|
||
|
||
<title>EBFT Training – Axolotl</title>
|
||
<style>
|
||
/* Default styles provided by pandoc.
|
||
** See https://pandoc.org/MANUAL.html#variables-for-html for config info.
|
||
*/
|
||
code{white-space: pre-wrap;}
|
||
span.smallcaps{font-variant: small-caps;}
|
||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||
div.column{flex: auto; overflow-x: auto;}
|
||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||
ul.task-list{list-style: none;}
|
||
ul.task-list li input[type="checkbox"] {
|
||
width: 0.8em;
|
||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
|
||
vertical-align: middle;
|
||
}
|
||
/* CSS for syntax highlighting */
|
||
html { -webkit-text-size-adjust: 100%; }
|
||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
|
||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||
.sourceCode { overflow: visible; }
|
||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||
div.sourceCode { margin: 1em 0; }
|
||
pre.sourceCode { margin: 0; }
|
||
@media screen {
|
||
div.sourceCode { overflow: auto; }
|
||
}
|
||
@media print {
|
||
pre > code.sourceCode { white-space: pre-wrap; }
|
||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||
}
|
||
pre.numberSource code
|
||
{ counter-reset: source-line 0; }
|
||
pre.numberSource code > span
|
||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||
pre.numberSource code > span > a:first-child::before
|
||
{ content: counter(source-line);
|
||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||
border: none; display: inline-block;
|
||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||
-khtml-user-select: none; -moz-user-select: none;
|
||
-ms-user-select: none; user-select: none;
|
||
padding: 0 4px; width: 4em;
|
||
}
|
||
pre.numberSource { margin-left: 3em; padding-left: 4px; }
|
||
div.sourceCode
|
||
{ }
|
||
@media screen {
|
||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||
}
|
||
</style>
|
||
|
||
|
||
<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
|
||
<script src="../site_libs/clipboard/clipboard.min.js"></script>
|
||
<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
|
||
<script src="../site_libs/quarto-search/fuse.min.js"></script>
|
||
<script src="../site_libs/quarto-search/quarto-search.js"></script>
|
||
<meta name="quarto:offset" content="../">
|
||
<link href="../favicon.jpg" rel="icon" type="image/jpeg">
|
||
<script src="../site_libs/quarto-html/quarto.js" type="module"></script>
|
||
<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
|
||
<script src="../site_libs/quarto-html/popper.min.js"></script>
|
||
<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||
<script src="../site_libs/quarto-html/anchor.min.js"></script>
|
||
<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-f418161beb48e0141c760e455f12af2c.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||
<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
|
||
<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||
<link href="../site_libs/bootstrap/bootstrap-880650c6ad5b2af23899fb63005ac339.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
|
||
<script id="quarto-search-options" type="application/json">{
|
||
"location": "navbar",
|
||
"copy-button": false,
|
||
"collapse-after": 3,
|
||
"panel-placement": "end",
|
||
"type": "overlay",
|
||
"limit": 50,
|
||
"keyboard-shortcut": [
|
||
"f",
|
||
"/",
|
||
"s"
|
||
],
|
||
"show-item-context": false,
|
||
"language": {
|
||
"search-no-results-text": "No results",
|
||
"search-matching-documents-text": "matching documents",
|
||
"search-copy-link-title": "Copy link to search",
|
||
"search-hide-matches-text": "Hide additional matches",
|
||
"search-more-match-text": "more match in this document",
|
||
"search-more-matches-text": "more matches in this document",
|
||
"search-clear-button-title": "Clear",
|
||
"search-text-placeholder": "",
|
||
"search-detached-cancel-button-title": "Cancel",
|
||
"search-submit-button-title": "Submit",
|
||
"search-label": "Search"
|
||
}
|
||
}</script>
|
||
<script async="" src="https://www.googletagmanager.com/gtag/js?id=G-9KYCVJBNMQ"></script>
|
||
|
||
<script type="text/javascript">
|
||
|
||
window.dataLayer = window.dataLayer || [];
|
||
function gtag(){dataLayer.push(arguments);}
|
||
gtag('js', new Date());
|
||
gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||
</script>
|
||
|
||
|
||
<link rel="stylesheet" href="../styles.css">
|
||
</head>
|
||
|
||
<body class="nav-sidebar docked nav-fixed quarto-light">
|
||
|
||
<div id="quarto-search-results"></div>
|
||
<header id="quarto-header" class="headroom fixed-top">
|
||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||
<div class="navbar-container container-fluid">
|
||
<div class="navbar-brand-container mx-auto">
|
||
<a href="../index.html" class="navbar-brand navbar-brand-logo">
|
||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo light-content">
|
||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo dark-content">
|
||
</a>
|
||
</div>
|
||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||
<a href="https://twitter.com/axolotl_ai" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-twitter"></i></a>
|
||
<a href="https://github.com/axolotl-ai-cloud/axolotl/" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-github"></i></a>
|
||
<a href="https://discord.gg/7m9sfhzaf3" title="" class="quarto-navigation-tool px-1" aria-label=""><i class="bi bi-discord"></i></a>
|
||
</div>
|
||
<div id="quarto-search" class="" title="Search"></div>
|
||
</div> <!-- /container-fluid -->
|
||
</nav>
|
||
<nav class="quarto-secondary-nav">
|
||
<div class="container-fluid d-flex">
|
||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||
</button>
|
||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/multimodal.html">How To Guides</a></li><li class="breadcrumb-item"><a href="../docs/ebft.html">EBFT Training</a></li></ol></nav>
|
||
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||
</a>
|
||
</div>
|
||
</nav>
|
||
</header>
|
||
<!-- content -->
|
||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||
<!-- sidebar -->
|
||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||
<div class="sidebar-menu-container">
|
||
<ul class="list-unstyled mt-1">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../index.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Home</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Getting Started</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-1" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/getting-started.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Quickstart</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/choosing_method.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Which Fine-Tuning Method Should I Use?</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/installation.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Installation</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/inference.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Inference and Merging</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Model Guides</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth2 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/kimi-linear.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Kimi Linear</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/plano.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Plano Orchestrator</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/mimo.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">MiMo</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/internvl3_5.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">InternVL 3.5</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/olmo3.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">OLMo 3</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/trinity.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Trinity</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/arcee.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Arcee AFM</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Ministral3</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth3 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/ministral3.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Ministral3</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/ministral3/think.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Ministral 3 Thinking</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/ministral3/vision.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Ministral 3 Vision</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="false">
|
||
<span class="menu-text">Magistral</span></a>
|
||
<a class="sidebar-item-toggle text-start collapsed" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="false" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth3 ">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/magistral.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Magistral</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/magistral/think.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Magistral Thinking</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/magistral/vision.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Magistral Vision</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/ministral.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Ministral</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/mistral-small.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Mistral Small 3.1/3.2</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/voxtral.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Voxtral</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/devstral.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Devstral</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/mistral.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Mistral 7B</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/llama-4.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Llama 4</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/llama-2.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Llama 2</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/qwen3-next.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Qwen 3 Next</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/qwen3.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Qwen 3</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/gemma3n.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Gemma 3n</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/apertus.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Apertus</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/gpt-oss.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">GPT-OSS</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/seed-oss.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Seed-OSS</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/phi.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Phi</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/smolvlm2.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">SmolVLM 2</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/granite4.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Granite 4</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/LiquidAI.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Liquid Foundation Models 2</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/hunyuan.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Hunyuan</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/jamba.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Jamba</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/models/orpheus.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Orpheus</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/cli.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Command Line Interface (CLI)</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/telemetry.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Telemetry</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/config-reference.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Config Reference</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/api" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">API Reference</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/index.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Dataset Formats</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/pretraining.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Pre-training</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/inst_tune.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Instruction Tuning</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/conversation.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Conversation</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/stepwise_supervised.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Stepwise Supervised Format</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/template_free.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Template-Free</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset-formats/tokenized.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Custom Pre-Tokenized Dataset</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Deployments</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-6" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/docker.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Docker</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Multi-GPU</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/multi-node.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Multi Node</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Ray Train</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">AMD GPUs on HPC Systems</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Mac M-series</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">How To Guides</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-7" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/multimodal.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">MultiModal / Vision Language Models (BETA)</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">RLHF (Beta)</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/grpo.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">GRPO Training</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/ebft.html" class="sidebar-item-text sidebar-link active">
|
||
<span class="menu-text">EBFT Training</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/vllm_serving.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">vLLM Serving for GRPO Training</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/reward_modelling.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Reward Modelling</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/lr_groups.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Learning Rate Groups</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/lora_optims.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">LoRA Optimizations</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset_loading.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Dataset Loading</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/qat.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Quantization Aware Training (QAT)</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/quantize.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Quantization with torchao</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/optimizations.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Optimizations Guide</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Core Concepts</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-8" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/batch_vs_grad.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Batch size vs Gradient accumulation</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/dataset_preprocessing.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Dataset Preprocessing</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/streaming.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Streaming Datasets</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/mixed_precision.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Mixed Precision Training</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/optimizers.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Optimizers</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/attention.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Attention</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-9" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Advanced Features</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-9" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-9" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">FSDP + QLoRA</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/unsloth.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Unsloth</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/torchao.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">PyTorch ao</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/custom_integrations.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Custom Integrations</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/sequence_parallelism.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Sequence Parallelism</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/gradient_checkpointing.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Gradient Checkpointing, Activation Offloading, and Layer Offloading</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/nd_parallelism.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">N-D Parallelism (Beta)</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/expert_quantization.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">MoE Expert Quantization</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="sidebar-item sidebar-item-section">
|
||
<div class="sidebar-item-container">
|
||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-10" role="navigation" aria-expanded="true">
|
||
<span class="menu-text">Troubleshooting</span></a>
|
||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-10" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||
<i class="bi bi-chevron-right ms-2"></i>
|
||
</a>
|
||
</div>
|
||
<ul id="quarto-sidebar-section-10" class="collapse list-unstyled sidebar-section depth1 show">
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">FAQ</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/training_stability.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Training Stability & Debugging</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">Debugging</span></a>
|
||
</div>
|
||
</li>
|
||
<li class="sidebar-item">
|
||
<div class="sidebar-item-container">
|
||
<a href="../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||
<span class="menu-text">NCCL</span></a>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</div>
|
||
</nav>
|
||
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
|
||
<!-- margin-sidebar -->
|
||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||
<nav id="TOC" role="doc-toc" class="toc-active" data-toc-expanded="2">
|
||
<h2 id="toc-title">On this page</h2>
|
||
|
||
<ul>
|
||
<li><a href="#overview" id="toc-overview" class="nav-link active" data-scroll-target="#overview">Overview</a>
|
||
<ul>
|
||
<li><a href="#how-ebft-differs-from-other-rl-methods" id="toc-how-ebft-differs-from-other-rl-methods" class="nav-link" data-scroll-target="#how-ebft-differs-from-other-rl-methods">How EBFT Differs from Other RL Methods</a></li>
|
||
<li><a href="#reward-formulation" id="toc-reward-formulation" class="nav-link" data-scroll-target="#reward-formulation">Reward Formulation</a></li>
|
||
</ul></li>
|
||
<li><a href="#modes" id="toc-modes" class="nav-link" data-scroll-target="#modes">Modes</a>
|
||
<ul>
|
||
<li><a href="#structured-mode-sync" id="toc-structured-mode-sync" class="nav-link" data-scroll-target="#structured-mode-sync">Structured Mode (Sync)</a></li>
|
||
<li><a href="#structured-mode-async" id="toc-structured-mode-async" class="nav-link" data-scroll-target="#structured-mode-async">Structured Mode (Async)</a></li>
|
||
<li><a href="#strided-mode" id="toc-strided-mode" class="nav-link" data-scroll-target="#strided-mode">Strided Mode</a></li>
|
||
</ul></li>
|
||
<li><a href="#quick-start" id="toc-quick-start" class="nav-link" data-scroll-target="#quick-start">Quick Start</a>
|
||
<ul>
|
||
<li><a href="#structured-mode" id="toc-structured-mode" class="nav-link" data-scroll-target="#structured-mode">Structured Mode</a></li>
|
||
<li><a href="#dataset-format" id="toc-dataset-format" class="nav-link" data-scroll-target="#dataset-format">Dataset Format</a></li>
|
||
</ul></li>
|
||
<li><a href="#feature-extraction" id="toc-feature-extraction" class="nav-link" data-scroll-target="#feature-extraction">Feature Extraction</a>
|
||
<ul>
|
||
<li><a href="#feature-layers" id="toc-feature-layers" class="nav-link" data-scroll-target="#feature-layers">Feature Layers</a></li>
|
||
<li><a href="#embed-methods" id="toc-embed-methods" class="nav-link" data-scroll-target="#embed-methods">Embed Methods</a></li>
|
||
<li><a href="#svd-whitening" id="toc-svd-whitening" class="nav-link" data-scroll-target="#svd-whitening">SVD Whitening</a></li>
|
||
<li><a href="#alignment-and-diversity-coefficients" id="toc-alignment-and-diversity-coefficients" class="nav-link" data-scroll-target="#alignment-and-diversity-coefficients">Alignment and Diversity Coefficients</a></li>
|
||
</ul></li>
|
||
<li><a href="#strided-mode-1" id="toc-strided-mode-1" class="nav-link" data-scroll-target="#strided-mode-1">Strided Mode</a>
|
||
<ul>
|
||
<li><a href="#how-block-parallel-generation-works" id="toc-how-block-parallel-generation-works" class="nav-link" data-scroll-target="#how-block-parallel-generation-works">How Block-Parallel Generation Works</a></li>
|
||
<li><a href="#strided-mode-configuration" id="toc-strided-mode-configuration" class="nav-link" data-scroll-target="#strided-mode-configuration">Strided Mode Configuration</a></li>
|
||
<li><a href="#advantage-estimators" id="toc-advantage-estimators" class="nav-link" data-scroll-target="#advantage-estimators">Advantage Estimators</a></li>
|
||
<li><a href="#strided-mode-constraints" id="toc-strided-mode-constraints" class="nav-link" data-scroll-target="#strided-mode-constraints">Strided Mode Constraints</a></li>
|
||
<li><a href="#cross-entropy-loss" id="toc-cross-entropy-loss" class="nav-link" data-scroll-target="#cross-entropy-loss">Cross-Entropy Loss</a></li>
|
||
</ul></li>
|
||
<li><a href="#dataset-formats" id="toc-dataset-formats" class="nav-link" data-scroll-target="#dataset-formats">Dataset Formats</a>
|
||
<ul>
|
||
<li><a href="#built-in-transforms" id="toc-built-in-transforms" class="nav-link" data-scroll-target="#built-in-transforms">Built-In Transforms</a></li>
|
||
<li><a href="#structured-mode-datasets" id="toc-structured-mode-datasets" class="nav-link" data-scroll-target="#structured-mode-datasets">Structured Mode Datasets</a></li>
|
||
<li><a href="#multi-turn-datasets" id="toc-multi-turn-datasets" class="nav-link" data-scroll-target="#multi-turn-datasets">Multi-Turn Datasets</a></li>
|
||
<li><a href="#strided-mode-datasets" id="toc-strided-mode-datasets" class="nav-link" data-scroll-target="#strided-mode-datasets">Strided Mode Datasets</a></li>
|
||
<li><a href="#custom-transforms" id="toc-custom-transforms" class="nav-link" data-scroll-target="#custom-transforms">Custom Transforms</a></li>
|
||
</ul></li>
|
||
<li><a href="#configuration-reference" id="toc-configuration-reference" class="nav-link" data-scroll-target="#configuration-reference">Configuration Reference</a>
|
||
<ul>
|
||
<li><a href="#common-parameters-all-modes" id="toc-common-parameters-all-modes" class="nav-link" data-scroll-target="#common-parameters-all-modes">Common Parameters (All Modes)</a></li>
|
||
<li><a href="#strided-mode-parameters" id="toc-strided-mode-parameters" class="nav-link" data-scroll-target="#strided-mode-parameters">Strided Mode Parameters</a></li>
|
||
<li><a href="#structured-mode-trl-parameters" id="toc-structured-mode-trl-parameters" class="nav-link" data-scroll-target="#structured-mode-trl-parameters">Structured Mode TRL Parameters</a></li>
|
||
<li><a href="#stop-tokens" id="toc-stop-tokens" class="nav-link" data-scroll-target="#stop-tokens">Stop Tokens</a></li>
|
||
<li><a href="#multi-turn-chat-settings" id="toc-multi-turn-chat-settings" class="nav-link" data-scroll-target="#multi-turn-chat-settings">Multi-Turn Chat Settings</a></li>
|
||
</ul></li>
|
||
<li><a href="#monitoring" id="toc-monitoring" class="nav-link" data-scroll-target="#monitoring">Monitoring</a>
|
||
<ul>
|
||
<li><a href="#key-metrics" id="toc-key-metrics" class="nav-link" data-scroll-target="#key-metrics">Key Metrics</a></li>
|
||
<li><a href="#console-log-example" id="toc-console-log-example" class="nav-link" data-scroll-target="#console-log-example">Console Log Example</a></li>
|
||
<li><a href="#troubleshooting" id="toc-troubleshooting" class="nav-link" data-scroll-target="#troubleshooting">Troubleshooting</a></li>
|
||
<li><a href="#feature-network-memory" id="toc-feature-network-memory" class="nav-link" data-scroll-target="#feature-network-memory">Feature Network Memory</a></li>
|
||
</ul></li>
|
||
<li><a href="#examples" id="toc-examples" class="nav-link" data-scroll-target="#examples">Examples</a></li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
<!-- main -->
|
||
<main class="content" id="quarto-document-content">
|
||
|
||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/multimodal.html">How To Guides</a></li><li class="breadcrumb-item"><a href="../docs/ebft.html">EBFT Training</a></li></ol></nav>
|
||
<div class="quarto-title">
|
||
<h1 class="title">EBFT Training</h1>
|
||
</div>
|
||
|
||
<div>
|
||
<div class="description">
|
||
Energy-Based Fine-Tuning uses feature-matching rewards from internal representations to train language models without external reward functions.
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<div class="quarto-title-meta">
|
||
|
||
|
||
|
||
|
||
</div>
|
||
|
||
|
||
|
||
</header>
|
||
|
||
|
||
<section id="overview" class="level2">
|
||
<h2 class="anchored" data-anchor-id="overview">Overview</h2>
|
||
<p>Energy-Based Fine-Tuning (EBFT) is a training method that optimizes language models by matching the <strong>internal feature representations</strong> of generated text to those of ground-truth completions. Instead of relying on external reward models or hand-crafted reward functions, EBFT extracts hidden states from intermediate layers of a frozen copy of the model and uses cosine similarity between generated and reference features as the reward signal.</p>
|
||
<p>Paper: <a href="https://arxiv.org/abs/2603.12248">“Matching Features, Not Tokens: Energy-Based Fine-Tuning of Language Models”</a> (Jelassi et al., 2026)</p>
|
||
<section id="how-ebft-differs-from-other-rl-methods" class="level3">
|
||
<h3 class="anchored" data-anchor-id="how-ebft-differs-from-other-rl-methods">How EBFT Differs from Other RL Methods</h3>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 19%">
|
||
<col style="width: 33%">
|
||
<col style="width: 23%">
|
||
<col style="width: 23%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Method</th>
|
||
<th>Reward Signal</th>
|
||
<th>Requires</th>
|
||
<th>Best For</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><strong>GRPO</strong></td>
|
||
<td>External reward function(s)</td>
|
||
<td>Custom reward code or reward model</td>
|
||
<td>Tasks with verifiable answers (math, code)</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><strong>DPO</strong></td>
|
||
<td>Preference pairs (chosen vs rejected)</td>
|
||
<td>Paired preference data</td>
|
||
<td>Alignment with human preferences</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><strong>EBFT</strong></td>
|
||
<td>Feature similarity to ground truth</td>
|
||
<td>Ground-truth completions</td>
|
||
<td>Any task with reference outputs</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>EBFT’s key advantage is that it needs only ground-truth completions – no reward engineering, no preference annotation, and no reward model training. The model’s own internal representations serve as the reward signal. This makes it particularly effective for:</p>
|
||
<ul>
|
||
<li>Code generation (match features of known-good solutions)</li>
|
||
<li>Instruction following with reference outputs</li>
|
||
<li>Continual pretraining on unstructured text (strided mode)</li>
|
||
<li>Multi-turn dialogue with reference conversations</li>
|
||
</ul>
|
||
</section>
|
||
<section id="reward-formulation" class="level3">
|
||
<h3 class="anchored" data-anchor-id="reward-formulation">Reward Formulation</h3>
|
||
<p>The EBFT reward for each generated completion is:</p>
|
||
<pre><code>reward = alignment_coef * cosine_similarity(gen_features, gt_features)
|
||
- diversity_coef * mean_pairwise_similarity(gen_features)</code></pre>
|
||
<ul>
|
||
<li><strong>Alignment</strong>: How closely the generated output’s internal representations match the ground truth. Higher is better.</li>
|
||
<li><strong>Diversity</strong>: Penalizes generated samples that are too similar to each other (prevents mode collapse). Lower is better.</li>
|
||
<li><strong>CFM loss</strong> (Cross-Feature Matching): Tracks <code>||mean(gen_features) - gt_features||^2</code> as a diagnostic. This is the quantity that EBFT ultimately minimizes.</li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
<section id="modes" class="level2">
|
||
<h2 class="anchored" data-anchor-id="modes">Modes</h2>
|
||
<p>EBFT supports three operational modes, each suited to different use cases.</p>
|
||
<section id="structured-mode-sync" class="level3">
|
||
<h3 class="anchored" data-anchor-id="structured-mode-sync">Structured Mode (Sync)</h3>
|
||
<p>Uses vLLM on a separate GPU for generation, with sequential generate-score-train steps. This is the simplest mode and recommended for getting started.</p>
|
||
<pre><code>GPU 0: vLLM Server (generates completions, receives weight syncs)
|
||
GPU 1: Trainer (feature extraction, reward computation, GRPO training)</code></pre>
|
||
<p><strong>When to use</strong>: Standard instruction-following or QA datasets where you have prompt/completion pairs. Requires 2 GPUs.</p>
|
||
</section>
|
||
<section id="structured-mode-async" class="level3">
|
||
<h3 class="anchored" data-anchor-id="structured-mode-async">Structured Mode (Async)</h3>
|
||
<p>Same architecture as sync, but overlaps generation of the next batch with training on the current batch. Faster throughput at the cost of slightly stale weights during generation.</p>
|
||
<p><strong>When to use</strong>: Same data as sync mode, but when you want faster training and can tolerate weight staleness (controlled by <code>vllm_sync_interval</code>).</p>
|
||
</section>
|
||
<section id="strided-mode" class="level3">
|
||
<h3 class="anchored" data-anchor-id="strided-mode">Strided Mode</h3>
|
||
<p>Runs entirely on a single GPU with no vLLM dependency. Places anchor points throughout a document and generates short rollouts at each anchor using block-parallel attention patterns.</p>
|
||
<pre><code>Single GPU: Base model + LoRA adapter
|
||
- Strided block-parallel generation (flex_attention)
|
||
- Feature extraction via disable_adapter()
|
||
- No vLLM needed</code></pre>
|
||
<p><strong>When to use</strong>: Unstructured text data (raw code, prose, documents) where there is no natural prompt/completion split. Also works with structured data that includes prompt boundaries. Requires only 1 GPU.</p>
|
||
</section>
|
||
</section>
|
||
<section id="quick-start" class="level2">
|
||
<h2 class="anchored" data-anchor-id="quick-start">Quick Start</h2>
|
||
<section id="structured-mode" class="level3">
|
||
<h3 class="anchored" data-anchor-id="structured-mode">Structured Mode</h3>
|
||
<p>This minimal example fine-tunes Qwen2-0.5B on code data using EBFT with vLLM generation.</p>
|
||
<p><strong>Step 1</strong>: Create a config file <code>ebft_quickstart.yaml</code>:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb4"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">base_model</span><span class="kw">:</span><span class="at"> Qwen/Qwen2-0.5B-Instruct</span></span>
|
||
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> ebft</span></span>
|
||
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a><span class="fu">ebft</span><span class="kw">:</span></span>
|
||
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">feature_layers</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="fl">0.25</span><span class="kw">,</span><span class="at"> </span><span class="fl">0.5</span><span class="kw">,</span><span class="at"> </span><span class="fl">0.75</span><span class="kw">]</span></span>
|
||
<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">embed_method</span><span class="kw">:</span><span class="at"> last_token</span></span>
|
||
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">alignment_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
|
||
<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">diversity_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
|
||
<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||
<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">num_generations</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span></span>
|
||
<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">max_completion_length</span><span class="kw">:</span><span class="at"> </span><span class="dv">256</span></span>
|
||
<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">temperature</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.7</span></span>
|
||
<span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_vllm</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_server_host</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.0.0.0</span></span>
|
||
<span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_server_port</span><span class="kw">:</span><span class="at"> </span><span class="dv">8000</span></span>
|
||
<span id="cb4-18"><a href="#cb4-18" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_lora_sync</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb4-19"><a href="#cb4-19" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">vllm_sync_interval</span><span class="kw">:</span><span class="at"> </span><span class="dv">3</span></span>
|
||
<span id="cb4-20"><a href="#cb4-20" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_data_producer</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb4-21"><a href="#cb4-21" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">async_prefetch</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||
<span id="cb4-22"><a href="#cb4-22" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">scale_rewards</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb4-23"><a href="#cb4-23" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">loss_type</span><span class="kw">:</span><span class="at"> grpo</span></span>
|
||
<span id="cb4-24"><a href="#cb4-24" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb4-25"><a href="#cb4-25" aria-hidden="true" tabindex="-1"></a><span class="fu">vllm</span><span class="kw">:</span></span>
|
||
<span id="cb4-26"><a href="#cb4-26" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">gpu_memory_utilization</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span></span>
|
||
<span id="cb4-27"><a href="#cb4-27" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">max_model_len</span><span class="kw">:</span><span class="at"> </span><span class="dv">1024</span></span>
|
||
<span id="cb4-28"><a href="#cb4-28" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb4-29"><a href="#cb4-29" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||
<span id="cb4-30"><a href="#cb4-30" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> nvidia/OpenCodeInstruct</span></span>
|
||
<span id="cb4-31"><a href="#cb4-31" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> ebft_opencode.transform</span></span>
|
||
<span id="cb4-32"><a href="#cb4-32" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train[:500]</span></span>
|
||
<span id="cb4-33"><a href="#cb4-33" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb4-34"><a href="#cb4-34" aria-hidden="true" tabindex="-1"></a><span class="co"># Standard training settings (see getting-started.qmd for details)</span></span>
|
||
<span id="cb4-35"><a href="#cb4-35" aria-hidden="true" tabindex="-1"></a><span class="fu">adapter</span><span class="kw">:</span><span class="at"> lora</span></span>
|
||
<span id="cb4-36"><a href="#cb4-36" aria-hidden="true" tabindex="-1"></a><span class="fu">lora_r</span><span class="kw">:</span><span class="at"> </span><span class="dv">16</span></span>
|
||
<span id="cb4-37"><a href="#cb4-37" aria-hidden="true" tabindex="-1"></a><span class="fu">lora_alpha</span><span class="kw">:</span><span class="at"> </span><span class="dv">32</span></span>
|
||
<span id="cb4-38"><a href="#cb4-38" aria-hidden="true" tabindex="-1"></a><span class="fu">lora_target_linear</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb4-39"><a href="#cb4-39" aria-hidden="true" tabindex="-1"></a><span class="fu">sequence_len</span><span class="kw">:</span><span class="at"> </span><span class="dv">1024</span></span>
|
||
<span id="cb4-40"><a href="#cb4-40" aria-hidden="true" tabindex="-1"></a><span class="fu">micro_batch_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||
<span id="cb4-41"><a href="#cb4-41" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_accumulation_steps</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span></span>
|
||
<span id="cb4-42"><a href="#cb4-42" aria-hidden="true" tabindex="-1"></a><span class="fu">max_steps</span><span class="kw">:</span><span class="at"> </span><span class="dv">20</span></span>
|
||
<span id="cb4-43"><a href="#cb4-43" aria-hidden="true" tabindex="-1"></a><span class="fu">learning_rate</span><span class="kw">:</span><span class="at"> </span><span class="fl">5.0e-6</span></span>
|
||
<span id="cb4-44"><a href="#cb4-44" aria-hidden="true" tabindex="-1"></a><span class="fu">bf16</span><span class="kw">:</span><span class="at"> auto</span></span>
|
||
<span id="cb4-45"><a href="#cb4-45" aria-hidden="true" tabindex="-1"></a><span class="fu">flash_attention</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb4-46"><a href="#cb4-46" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb4-47"><a href="#cb4-47" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span><span class="at"> ./outputs/ebft-quickstart</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p><strong>Step 2</strong>: Start vLLM on GPU 0:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb5"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="va">CUDA_VISIBLE_DEVICES</span><span class="op">=</span>0 <span class="ex">axolotl</span> vllm-serve ebft_quickstart.yaml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p><strong>Step 3</strong>: Wait approximately 30 seconds for vLLM to initialize, then start training on GPU 1:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb6"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="va">CUDA_VISIBLE_DEVICES</span><span class="op">=</span>1 <span class="ex">axolotl</span> train ebft_quickstart.yaml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<div class="callout callout-style-default callout-important callout-titled">
|
||
<div class="callout-header d-flex align-content-center">
|
||
<div class="callout-icon-container">
|
||
<i class="callout-icon"></i>
|
||
</div>
|
||
<div class="callout-title-container flex-fill">
|
||
Important
|
||
</div>
|
||
</div>
|
||
<div class="callout-body-container callout-body">
|
||
<p>The <code>micro_batch_size</code> must be divisible by <code>num_generations</code>. For example, with <code>num_generations: 4</code>, valid values are 4, 8, 12, etc.</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="dataset-format" class="level3">
|
||
<h3 class="anchored" data-anchor-id="dataset-format">Dataset Format</h3>
|
||
<p>Structured mode datasets must produce two fields after the transform:</p>
|
||
<ul>
|
||
<li><code>prompt</code>: Either a string or a list of chat messages (<code>[{"role": "user", "content": "..."}]</code>)</li>
|
||
<li><code>ground_truth</code>: A string containing the reference completion</li>
|
||
</ul>
|
||
<p>Example raw dataset row:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb7"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">{</span></span>
|
||
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"input"</span><span class="fu">:</span> <span class="st">"Write a function to compute fibonacci numbers."</span><span class="fu">,</span></span>
|
||
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"output"</span><span class="fu">:</span> <span class="st">"def fibonacci(n):</span><span class="ch">\n</span><span class="st"> if n <= 1:</span><span class="ch">\n</span><span class="st"> return n</span><span class="ch">\n</span><span class="st"> return fibonacci(n-1) + fibonacci(n-2)"</span></span>
|
||
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="fu">}</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p>The <code>ebft_opencode.transform</code> converts this to the required <code>{prompt, ground_truth}</code> format automatically.</p>
|
||
</section>
|
||
</section>
|
||
<section id="feature-extraction" class="level2">
|
||
<h2 class="anchored" data-anchor-id="feature-extraction">Feature Extraction</h2>
|
||
<p>EBFT extracts hidden states from intermediate transformer layers and pools them into per-sequence embeddings. These embeddings are compared between generated and ground-truth completions to compute rewards.</p>
|
||
<section id="feature-layers" class="level3">
|
||
<h3 class="anchored" data-anchor-id="feature-layers">Feature Layers</h3>
|
||
<p>The <code>feature_layers</code> parameter specifies which layers to extract, as fractions of total model depth:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb8"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ebft</span><span class="kw">:</span></span>
|
||
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">feature_layers</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="fl">0.25</span><span class="kw">,</span><span class="at"> </span><span class="fl">0.5</span><span class="kw">,</span><span class="at"> </span><span class="fl">0.75</span><span class="kw">]</span><span class="co"> # Quarter, middle, three-quarter depth</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p>For a 32-layer model, this extracts layers 8, 16, and 24. The hidden states from all selected layers are concatenated along the feature dimension, producing embeddings of size <code>num_layers * hidden_dim</code>.</p>
|
||
<div class="callout callout-style-default callout-tip callout-titled">
|
||
<div class="callout-header d-flex align-content-center">
|
||
<div class="callout-icon-container">
|
||
<i class="callout-icon"></i>
|
||
</div>
|
||
<div class="callout-title-container flex-fill">
|
||
Tip
|
||
</div>
|
||
</div>
|
||
<div class="callout-body-container callout-body">
|
||
<p>Using multiple layers captures both low-level syntactic features (early layers) and high-level semantic features (later layers). The default <code>[0.25, 0.5, 0.75]</code> works well across model sizes.</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="embed-methods" class="level3">
|
||
<h3 class="anchored" data-anchor-id="embed-methods">Embed Methods</h3>
|
||
<p>The <code>embed_method</code> controls how per-token hidden states are pooled into a single vector per sequence:</p>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 19%">
|
||
<col style="width: 31%">
|
||
<col style="width: 31%">
|
||
<col style="width: 17%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Method</th>
|
||
<th>Description</th>
|
||
<th>Output Shape</th>
|
||
<th>Notes</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><code>last_token</code></td>
|
||
<td>Hidden state at the last non-padding token</td>
|
||
<td><code>(B, D)</code></td>
|
||
<td>Default. Good for autoregressive models where the last token summarizes the sequence.</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>mean_pooling</code></td>
|
||
<td>Mean of all non-padding token states</td>
|
||
<td><code>(B, D)</code></td>
|
||
<td>Considers the entire sequence equally.</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>completion_mean</code></td>
|
||
<td>Mean over completion tokens only (excludes prompt)</td>
|
||
<td><code>(B, D)</code></td>
|
||
<td>Focuses reward signal on generated content. Requires prompt length information.</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>concat</code></td>
|
||
<td>Concatenation of states at 25%, 50%, 75% positions</td>
|
||
<td><code>(B, 3*D)</code></td>
|
||
<td>Captures positional structure. Higher dimensional.</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb9"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ebft</span><span class="kw">:</span></span>
|
||
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">embed_method</span><span class="kw">:</span><span class="at"> completion_mean</span><span class="co"> # Focus on completion features</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</section>
|
||
<section id="svd-whitening" class="level3">
|
||
<h3 class="anchored" data-anchor-id="svd-whitening">SVD Whitening</h3>
|
||
<p>Whitening decorrelates the feature dimensions so that no single direction dominates the feature-matching loss. This is computed via SVD on the generated embeddings, with the same transform applied to the ground-truth embeddings.</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb10"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ebft</span><span class="kw">:</span></span>
|
||
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_whitening</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p>When whitening is enabled, the reward computation applies a whitening matrix <code>W = U @ diag(1/S) @ U^T</code> derived from the SVD of generated embeddings. This ensures all feature dimensions contribute equally to the alignment reward.</p>
|
||
<div class="callout callout-style-default callout-note callout-titled">
|
||
<div class="callout-header d-flex align-content-center">
|
||
<div class="callout-icon-container">
|
||
<i class="callout-icon"></i>
|
||
</div>
|
||
<div class="callout-title-container flex-fill">
|
||
Note
|
||
</div>
|
||
</div>
|
||
<div class="callout-body-container callout-body">
|
||
<p>Singular values scale with <code>sqrt(batch_size)</code>, so reward magnitudes are batch-size dependent. This is acceptable because the number of samples per prompt (<code>n_samples_per_prompt</code> or <code>num_generations</code>) is fixed during training.</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="alignment-and-diversity-coefficients" class="level3">
|
||
<h3 class="anchored" data-anchor-id="alignment-and-diversity-coefficients">Alignment and Diversity Coefficients</h3>
|
||
<p>The two reward components are weighted by coefficients:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb11"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ebft</span><span class="kw">:</span></span>
|
||
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">alignment_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # Weight for cosine similarity with ground truth</span></span>
|
||
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">diversity_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # Weight for pairwise similarity penalty</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p>Both values are scaled by 2 internally (per paper equation 7). The final reward per sample is:</p>
|
||
<pre><code>reward_j = 2 * alignment_coef * cos(gen_j, gt)
|
||
- 2 * diversity_coef * (1/(n-1)) * sum_{j' != j} dot(gen_j, gen_j')</code></pre>
|
||
<p>Setting <code>diversity_coef: 0.0</code> disables the diversity penalty entirely, which may be appropriate when <code>num_generations</code> is small (e.g., 2).</p>
|
||
</section>
|
||
</section>
|
||
<section id="strided-mode-1" class="level2">
|
||
<h2 class="anchored" data-anchor-id="strided-mode-1">Strided Mode</h2>
|
||
<p>Strided mode is designed for training on unstructured text data where there is no natural prompt/completion boundary. Instead of generating full completions with vLLM, it places <strong>anchor points</strong> at regular intervals throughout each document and generates short rollouts at each anchor using block-parallel attention.</p>
|
||
<section id="how-block-parallel-generation-works" class="level3">
|
||
<h3 class="anchored" data-anchor-id="how-block-parallel-generation-works">How Block-Parallel Generation Works</h3>
|
||
<p>Given a document of length <code>S</code> tokens:</p>
|
||
<ol type="1">
|
||
<li><strong>Anchor placement</strong>: Starting at position <code>anchor_offset</code>, place anchors every <code>stride</code> tokens. Each anchor defines a block.</li>
|
||
<li><strong>Context window</strong>: Each block sees <code>context_length</code> tokens of preceding context from the original document.</li>
|
||
<li><strong>Generation</strong>: At each anchor, generate <code>generate_max_len</code> tokens autoregressively, conditioned only on the context window.</li>
|
||
<li><strong>Parallelism</strong>: All blocks are processed in a single forward pass using a specialized attention mask that prevents information leakage between blocks.</li>
|
||
</ol>
|
||
<pre><code>Document: [tok0, tok1, ..., tok_S]
|
||
| | |
|
||
anchor_0 anchor_1 anchor_2
|
||
| | |
|
||
[ctx][gen] [ctx][gen] [ctx][gen]</code></pre>
|
||
<p>The attention mask ensures:</p>
|
||
<ul>
|
||
<li>Prompt tokens use standard causal attention</li>
|
||
<li>Each generated block attends to its own context window and its own preceding generated tokens</li>
|
||
<li>Blocks do not attend to each other’s generated tokens</li>
|
||
</ul>
|
||
<p>When <code>flex_attention</code> is available (PyTorch >= 2.5), the mask is compiled into efficient fused kernels. Otherwise, a dense 4D attention mask is used as a fallback.</p>
|
||
</section>
|
||
<section id="strided-mode-configuration" class="level3">
|
||
<h3 class="anchored" data-anchor-id="strided-mode-configuration">Strided Mode Configuration</h3>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb14"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">base_model</span><span class="kw">:</span><span class="at"> meta-llama/Llama-3.2-1B</span></span>
|
||
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rl</span><span class="kw">:</span><span class="at"> ebft</span></span>
|
||
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a><span class="fu">ebft</span><span class="kw">:</span></span>
|
||
<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">mode</span><span class="kw">:</span><span class="at"> strided</span></span>
|
||
<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">stride</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span><span class="co"> # Tokens between anchor points</span></span>
|
||
<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">context_length</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span><span class="co"> # Context window per block</span></span>
|
||
<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">generate_max_len</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span><span class="co"> # Tokens to generate per block</span></span>
|
||
<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">n_samples_per_prompt</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span><span class="co"> # Independent rollouts per document</span></span>
|
||
<span id="cb14-10"><a href="#cb14-10" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">temperature</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.6</span></span>
|
||
<span id="cb14-11"><a href="#cb14-11" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">feature_layers</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="fl">0.25</span><span class="kw">,</span><span class="at"> </span><span class="fl">0.5</span><span class="kw">,</span><span class="at"> </span><span class="fl">0.75</span><span class="kw">]</span></span>
|
||
<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">embed_method</span><span class="kw">:</span><span class="at"> last_token</span></span>
|
||
<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_whitening</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">alignment_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
|
||
<span id="cb14-15"><a href="#cb14-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">diversity_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span></span>
|
||
<span id="cb14-16"><a href="#cb14-16" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">rl_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # RL policy gradient loss weight</span></span>
|
||
<span id="cb14-17"><a href="#cb14-17" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">ce_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.03</span><span class="co"> # Cross-entropy loss on GT tokens</span></span>
|
||
<span id="cb14-18"><a href="#cb14-18" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">advantage_estimator</span><span class="kw">:</span><span class="at"> rloo</span><span class="co"> # rloo, group_norm, or reinforce</span></span>
|
||
<span id="cb14-19"><a href="#cb14-19" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">min_completion_prefix</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span><span class="co"> # Skip anchors in prompt region</span></span>
|
||
<span id="cb14-20"><a href="#cb14-20" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb14-21"><a href="#cb14-21" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||
<span id="cb14-22"><a href="#cb14-22" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> nvidia/OpenCodeInstruct</span></span>
|
||
<span id="cb14-23"><a href="#cb14-23" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> ebft_strided_structured.transform</span></span>
|
||
<span id="cb14-24"><a href="#cb14-24" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train[:1%]</span></span>
|
||
<span id="cb14-25"><a href="#cb14-25" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb14-26"><a href="#cb14-26" aria-hidden="true" tabindex="-1"></a><span class="fu">sequence_len</span><span class="kw">:</span><span class="at"> </span><span class="dv">2048</span></span>
|
||
<span id="cb14-27"><a href="#cb14-27" aria-hidden="true" tabindex="-1"></a><span class="fu">micro_batch_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
|
||
<span id="cb14-28"><a href="#cb14-28" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_accumulation_steps</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||
<span id="cb14-29"><a href="#cb14-29" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb14-30"><a href="#cb14-30" aria-hidden="true" tabindex="-1"></a><span class="fu">adapter</span><span class="kw">:</span><span class="at"> lora</span></span>
|
||
<span id="cb14-31"><a href="#cb14-31" aria-hidden="true" tabindex="-1"></a><span class="fu">lora_r</span><span class="kw">:</span><span class="at"> </span><span class="dv">16</span></span>
|
||
<span id="cb14-32"><a href="#cb14-32" aria-hidden="true" tabindex="-1"></a><span class="fu">lora_alpha</span><span class="kw">:</span><span class="at"> </span><span class="dv">32</span></span>
|
||
<span id="cb14-33"><a href="#cb14-33" aria-hidden="true" tabindex="-1"></a><span class="fu">lora_target_linear</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb14-34"><a href="#cb14-34" aria-hidden="true" tabindex="-1"></a></span>
|
||
<span id="cb14-35"><a href="#cb14-35" aria-hidden="true" tabindex="-1"></a><span class="fu">bf16</span><span class="kw">:</span><span class="at"> auto</span></span>
|
||
<span id="cb14-36"><a href="#cb14-36" aria-hidden="true" tabindex="-1"></a><span class="fu">flex_attention</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb14-37"><a href="#cb14-37" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||
<span id="cb14-38"><a href="#cb14-38" aria-hidden="true" tabindex="-1"></a><span class="fu">gradient_checkpointing_kwargs</span><span class="kw">:</span></span>
|
||
<span id="cb14-39"><a href="#cb14-39" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">use_reentrant</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span><span class="co"> # Required with flex_attention</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p>Run with a single command (no vLLM needed):</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb15"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="va">CUDA_VISIBLE_DEVICES</span><span class="op">=</span>0 <span class="ex">axolotl</span> train config.yaml</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</section>
|
||
<section id="advantage-estimators" class="level3">
|
||
<h3 class="anchored" data-anchor-id="advantage-estimators">Advantage Estimators</h3>
|
||
<p>Strided mode supports three advantage estimation methods:</p>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 33%">
|
||
<col style="width: 27%">
|
||
<col style="width: 39%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Estimator</th>
|
||
<th>Formula</th>
|
||
<th>Requirements</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><code>rloo</code></td>
|
||
<td>Leave-one-out baseline: <code>reward_j - mean(rewards_{-j})</code></td>
|
||
<td><code>n_samples_per_prompt >= 2</code></td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>group_norm</code></td>
|
||
<td>Group normalization: <code>(reward_j - mean) / std</code></td>
|
||
<td><code>n_samples_per_prompt >= 2</code></td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>reinforce</code></td>
|
||
<td>Raw reward as advantage (no baseline)</td>
|
||
<td>Works with <code>n_samples_per_prompt = 1</code></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<div class="callout callout-style-default callout-warning callout-titled">
|
||
<div class="callout-header d-flex align-content-center">
|
||
<div class="callout-icon-container">
|
||
<i class="callout-icon"></i>
|
||
</div>
|
||
<div class="callout-title-container flex-fill">
|
||
Warning
|
||
</div>
|
||
</div>
|
||
<div class="callout-body-container callout-body">
|
||
<p>When <code>n_samples_per_prompt: 1</code>, the trainer automatically falls back to <code>reinforce</code> and disables the diversity penalty (which requires multiple samples).</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<section id="strided-mode-constraints" class="level3">
|
||
<h3 class="anchored" data-anchor-id="strided-mode-constraints">Strided Mode Constraints</h3>
|
||
<ul>
|
||
<li><strong><code>flex_attention: true</code></strong> is strongly recommended. Without it, dense 4D masks consume significantly more memory.</li>
|
||
<li><strong><code>torch_compile: true</code></strong> must NOT be set. <code>flex_attention</code> compiles its own kernels internally; adding <code>torch_compile</code> causes conflicts and OOM.</li>
|
||
<li><strong>Gradient checkpointing</strong> must use <code>use_reentrant: true</code>. Non-reentrant checkpointing causes <code>CheckpointError</code> with <code>flex_attention</code> block masks.</li>
|
||
<li><strong><code>activation_offloading</code></strong> is incompatible with <code>flex_attention</code>.</li>
|
||
</ul>
|
||
</section>
|
||
<section id="cross-entropy-loss" class="level3">
|
||
<h3 class="anchored" data-anchor-id="cross-entropy-loss">Cross-Entropy Loss</h3>
|
||
<p>Strided mode supports an optional cross-entropy loss term on ground-truth tokens. This acts as a regularizer to prevent the model from drifting too far from the original distribution:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb16"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ebft</span><span class="kw">:</span></span>
|
||
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">ce_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.03</span><span class="co"> # Small CE coefficient</span></span>
|
||
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">rl_coef</span><span class="kw">:</span><span class="at"> </span><span class="fl">1.0</span><span class="co"> # RL loss coefficient</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p>The total loss is <code>rl_coef * rl_loss + ce_coef * ce_loss</code>. For structured mode, <code>ce_coef</code> is typically <code>0.0</code> since vLLM generation provides sufficient learning signal.</p>
|
||
</section>
|
||
</section>
|
||
<section id="dataset-formats" class="level2">
|
||
<h2 class="anchored" data-anchor-id="dataset-formats">Dataset Formats</h2>
|
||
<p>EBFT provides several built-in dataset transforms in <code>src/axolotl/prompt_strategies/ebft/</code>.</p>
|
||
<section id="built-in-transforms" class="level3">
|
||
<h3 class="anchored" data-anchor-id="built-in-transforms">Built-In Transforms</h3>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 22%">
|
||
<col style="width: 26%">
|
||
<col style="width: 30%">
|
||
<col style="width: 20%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Transform</th>
|
||
<th>Input Format</th>
|
||
<th>Output Fields</th>
|
||
<th>Use Case</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><code>ebft_opencode.transform</code></td>
|
||
<td><code>{input, output}</code></td>
|
||
<td><code>{prompt, ground_truth}</code></td>
|
||
<td>OpenCodeInstruct, structured QA</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>ebft_strided_structured.transform</code></td>
|
||
<td><code>{input, output}</code></td>
|
||
<td><code>{input_ids, labels, prompt_length}</code></td>
|
||
<td>Strided mode with structured data</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>ebft_strided_chat.transform</code></td>
|
||
<td><code>{messages: [...]}</code></td>
|
||
<td><code>{input_ids, labels, prompt_length}</code></td>
|
||
<td>Strided mode with chat data</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>ebft_chat_multiturn.transform</code></td>
|
||
<td><code>{messages: [...]}</code></td>
|
||
<td><code>{prompt, ground_truth, remaining_turns}</code></td>
|
||
<td>Multi-turn: first-turn target</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>ebft_chat_multiturn.transform_last_turn</code></td>
|
||
<td><code>{messages: [...]}</code></td>
|
||
<td><code>{prompt, ground_truth}</code></td>
|
||
<td>Multi-turn: last-turn target</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>ebft_chat_multiturn.transform_all_turns</code></td>
|
||
<td><code>{messages: [...]}</code></td>
|
||
<td><code>{prompt[], ground_truth[]}</code></td>
|
||
<td>Multi-turn: one example per turn</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>ebft_reasoning.transform</code></td>
|
||
<td><code>{messages: [...]}</code> (with <code><think></code>)</td>
|
||
<td><code>{prompt, ground_truth}</code></td>
|
||
<td>Reasoning/thinking datasets</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</section>
|
||
<section id="structured-mode-datasets" class="level3">
|
||
<h3 class="anchored" data-anchor-id="structured-mode-datasets">Structured Mode Datasets</h3>
|
||
<p>For structured (sync/async) mode, the transform must produce <code>prompt</code> and <code>ground_truth</code> fields:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb17"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> nvidia/OpenCodeInstruct</span></span>
|
||
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> ebft_opencode.transform</span></span>
|
||
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train[:500]</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</section>
|
||
<section id="multi-turn-datasets" class="level3">
|
||
<h3 class="anchored" data-anchor-id="multi-turn-datasets">Multi-Turn Datasets</h3>
|
||
<p>Multi-turn transforms extract conversation data for sequential rollout. The <code>transform</code> variant targets the first assistant turn, while <code>transform_last_turn</code> targets the final turn:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb18"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> your/multiturn-dataset</span></span>
|
||
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> ebft_chat_multiturn.transform</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p>When <code>remaining_turns</code> is present in the dataset output, the trainer performs sequential rollouts: it generates the first assistant turn with vLLM, then continues generating subsequent turns by building up the conversation history.</p>
|
||
</section>
|
||
<section id="strided-mode-datasets" class="level3">
|
||
<h3 class="anchored" data-anchor-id="strided-mode-datasets">Strided Mode Datasets</h3>
|
||
<p>Strided transforms tokenize the full document and produce <code>input_ids</code>, <code>labels</code>, and <code>prompt_length</code>:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb19"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> nvidia/OpenCodeInstruct</span></span>
|
||
<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> ebft_strided_structured.transform</span></span>
|
||
<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span><span class="at"> train[:1%]</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</section>
|
||
<section id="custom-transforms" class="level3">
|
||
<h3 class="anchored" data-anchor-id="custom-transforms">Custom Transforms</h3>
|
||
<p>To use your own dataset format, write a transform function:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> transform(cfg, <span class="op">**</span>kwargs):</span>
|
||
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a> <span class="kw">def</span> transform_fn(example, tokenizer<span class="op">=</span><span class="va">None</span>):</span>
|
||
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> {</span>
|
||
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a> <span class="st">"prompt"</span>: [{<span class="st">"role"</span>: <span class="st">"user"</span>, <span class="st">"content"</span>: example[<span class="st">"question"</span>]}],</span>
|
||
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a> <span class="st">"ground_truth"</span>: example[<span class="st">"answer"</span>],</span>
|
||
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a> }</span>
|
||
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> transform_fn, {<span class="st">"remove_columns"</span>: <span class="st">"__all__"</span>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
<p>The <code>"__all__"</code> sentinel removes all original dataset columns after the mapping step. Reference this transform in your config:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb21"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">datasets</span><span class="kw">:</span></span>
|
||
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> your/dataset</span></span>
|
||
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> your_module.transform</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</section>
|
||
</section>
|
||
<section id="configuration-reference" class="level2">
|
||
<h2 class="anchored" data-anchor-id="configuration-reference">Configuration Reference</h2>
|
||
<section id="common-parameters-all-modes" class="level3">
|
||
<h3 class="anchored" data-anchor-id="common-parameters-all-modes">Common Parameters (All Modes)</h3>
|
||
<p>These parameters are set under the <code>ebft:</code> key in the YAML config.</p>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 28%">
|
||
<col style="width: 15%">
|
||
<col style="width: 23%">
|
||
<col style="width: 33%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Parameter</th>
|
||
<th>Type</th>
|
||
<th>Default</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><code>mode</code></td>
|
||
<td><code>"structured"</code> or <code>"strided"</code></td>
|
||
<td><code>"structured"</code></td>
|
||
<td>EBFT operating mode</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>feature_layers</code></td>
|
||
<td><code>list[float]</code></td>
|
||
<td><code>[0.25, 0.5, 0.75]</code></td>
|
||
<td>Fractional layer depths for feature extraction</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>embed_method</code></td>
|
||
<td><code>string</code></td>
|
||
<td><code>"last_token"</code></td>
|
||
<td>Pooling method: <code>last_token</code>, <code>mean_pooling</code>, <code>completion_mean</code>, or <code>concat</code></td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>use_whitening</code></td>
|
||
<td><code>bool</code></td>
|
||
<td><code>false</code></td>
|
||
<td>Apply SVD whitening to feature embeddings before reward computation</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>alignment_coef</code></td>
|
||
<td><code>float</code></td>
|
||
<td><code>1.0</code></td>
|
||
<td>Weight for alignment reward (cosine similarity with ground truth)</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>diversity_coef</code></td>
|
||
<td><code>float</code></td>
|
||
<td><code>1.0</code></td>
|
||
<td>Weight for diversity penalty (pairwise dot product between samples)</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>ce_coef</code></td>
|
||
<td><code>float</code></td>
|
||
<td><code>0.0</code></td>
|
||
<td>Cross-entropy loss coefficient on ground-truth tokens</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>adaptive_max_tokens</code></td>
|
||
<td><code>bool</code></td>
|
||
<td><code>true</code></td>
|
||
<td>Dynamically set vLLM <code>max_tokens</code> based on ground-truth length (structured mode)</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>gt_length_multiplier</code></td>
|
||
<td><code>float</code></td>
|
||
<td><code>1.5</code></td>
|
||
<td>Multiplier for ground-truth token count when computing adaptive max tokens (min 0.1)</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</section>
|
||
<section id="strided-mode-parameters" class="level3">
|
||
<h3 class="anchored" data-anchor-id="strided-mode-parameters">Strided Mode Parameters</h3>
|
||
<p>These additional parameters apply only when <code>mode: strided</code>.</p>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 28%">
|
||
<col style="width: 15%">
|
||
<col style="width: 23%">
|
||
<col style="width: 33%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Parameter</th>
|
||
<th>Type</th>
|
||
<th>Default</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><code>stride</code></td>
|
||
<td><code>int</code></td>
|
||
<td><code>8</code></td>
|
||
<td>Number of tokens between anchor points (must be >= 1)</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>context_length</code></td>
|
||
<td><code>int</code></td>
|
||
<td><code>8</code></td>
|
||
<td>Context window size for each generated block (must be >= 1)</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>generate_max_len</code></td>
|
||
<td><code>int</code></td>
|
||
<td><code>8</code></td>
|
||
<td>Number of tokens to generate per block (must be >= 1)</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>n_samples_per_prompt</code></td>
|
||
<td><code>int</code></td>
|
||
<td><code>4</code></td>
|
||
<td>Number of independent rollouts per document (must be >= 1)</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>temperature</code></td>
|
||
<td><code>float</code></td>
|
||
<td><code>0.6</code></td>
|
||
<td>Sampling temperature for strided generation</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>top_p</code></td>
|
||
<td><code>float</code></td>
|
||
<td><code>1.0</code></td>
|
||
<td>Top-p nucleus sampling threshold</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>rl_coef</code></td>
|
||
<td><code>float</code></td>
|
||
<td><code>1.0</code></td>
|
||
<td>RL policy gradient loss coefficient</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>advantage_estimator</code></td>
|
||
<td><code>string</code></td>
|
||
<td><code>"rloo"</code></td>
|
||
<td>Advantage estimation method: <code>rloo</code>, <code>group_norm</code>, or <code>reinforce</code></td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>min_completion_prefix</code></td>
|
||
<td><code>int</code></td>
|
||
<td><code>0</code></td>
|
||
<td>Minimum tokens into the completion span before placing anchors</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</section>
|
||
<section id="structured-mode-trl-parameters" class="level3">
|
||
<h3 class="anchored" data-anchor-id="structured-mode-trl-parameters">Structured Mode TRL Parameters</h3>
|
||
<p>These are set under the <code>trl:</code> key and control the GRPO training loop.</p>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 28%">
|
||
<col style="width: 15%">
|
||
<col style="width: 23%">
|
||
<col style="width: 33%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Parameter</th>
|
||
<th>Type</th>
|
||
<th>Default</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><code>num_generations</code></td>
|
||
<td><code>int</code></td>
|
||
<td>–</td>
|
||
<td>Number of completions generated per prompt</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>max_completion_length</code></td>
|
||
<td><code>int</code></td>
|
||
<td>–</td>
|
||
<td>Maximum tokens per generated completion</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>temperature</code></td>
|
||
<td><code>float</code></td>
|
||
<td><code>0.7</code></td>
|
||
<td>Sampling temperature for vLLM generation</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>use_vllm</code></td>
|
||
<td><code>bool</code></td>
|
||
<td>–</td>
|
||
<td>Enable vLLM generation backend</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>vllm_lora_sync</code></td>
|
||
<td><code>bool</code></td>
|
||
<td><code>false</code></td>
|
||
<td>Sync LoRA adapters via filesystem (recommended)</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>vllm_sync_interval</code></td>
|
||
<td><code>int</code></td>
|
||
<td><code>1</code></td>
|
||
<td>Steps between weight syncs to vLLM</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>use_data_producer</code></td>
|
||
<td><code>bool</code></td>
|
||
<td>–</td>
|
||
<td>Required for sync mode with LoRA sync</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>async_prefetch</code></td>
|
||
<td><code>bool</code></td>
|
||
<td><code>false</code></td>
|
||
<td>Enable async generation (overlaps with training)</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>streaming_partial_batch</code></td>
|
||
<td><code>bool</code></td>
|
||
<td><code>false</code></td>
|
||
<td>Score groups incrementally (async mode)</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>skip_zero_advantage_batches</code></td>
|
||
<td><code>bool</code></td>
|
||
<td><code>false</code></td>
|
||
<td>Skip micro-batches where all advantages are zero</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>scale_rewards</code></td>
|
||
<td><code>bool</code></td>
|
||
<td>–</td>
|
||
<td>Normalize rewards within each prompt group</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>loss_type</code></td>
|
||
<td><code>string</code></td>
|
||
<td><code>"grpo"</code></td>
|
||
<td>Loss type for policy optimization</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>epsilon</code></td>
|
||
<td><code>float</code></td>
|
||
<td><code>0.2</code></td>
|
||
<td>Clipping parameter for importance sampling</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</section>
|
||
<section id="stop-tokens" class="level3">
|
||
<h3 class="anchored" data-anchor-id="stop-tokens">Stop Tokens</h3>
|
||
<p>vLLM needs explicit stop token IDs for generation. Common configurations:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb22"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">generation_kwargs</span><span class="kw">:</span></span>
|
||
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">stop_token_ids</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="dv">151645</span><span class="kw">,</span><span class="at"> </span><span class="dv">151643</span><span class="kw">]</span><span class="co"> # Qwen: <|im_end|>, <|endoftext|></span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</section>
|
||
<section id="multi-turn-chat-settings" class="level3">
|
||
<h3 class="anchored" data-anchor-id="multi-turn-chat-settings">Multi-Turn Chat Settings</h3>
|
||
<p>For multi-turn conversations with Qwen3.5, disable thinking mode to prevent <code><think></code> tags in completions:</p>
|
||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb23"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="fu">trl</span><span class="kw">:</span></span>
|
||
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">chat_template_kwargs</span><span class="kw">:</span></span>
|
||
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">enable_thinking</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||
</section>
|
||
</section>
|
||
<section id="monitoring" class="level2">
|
||
<h2 class="anchored" data-anchor-id="monitoring">Monitoring</h2>
|
||
<section id="key-metrics" class="level3">
|
||
<h3 class="anchored" data-anchor-id="key-metrics">Key Metrics</h3>
|
||
<p>EBFT logs several custom metrics to wandb and the training console. Here is what to watch for:</p>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 21%">
|
||
<col style="width: 36%">
|
||
<col style="width: 42%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Metric</th>
|
||
<th>Healthy Range</th>
|
||
<th>Interpretation</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><code>ebft/alignment</code></td>
|
||
<td>0.3 – 0.9, trending upward</td>
|
||
<td>Cosine similarity between generated and ground-truth features. Higher means the model is learning to produce representations that match the reference.</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>ebft/diversity</code></td>
|
||
<td>0.01 – 0.1</td>
|
||
<td>Mean pairwise similarity between different generations for the same prompt. Values above 1.0 indicate mode collapse.</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>ebft/cfm_loss</code></td>
|
||
<td>Below 10, trending downward</td>
|
||
<td>Cross-Feature Matching loss. This is the core quantity being minimized. Consistently above 100 indicates instability.</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>ebft/reward</code></td>
|
||
<td>Trending upward (may start negative)</td>
|
||
<td>Combined reward signal. If stuck at -1.0, the diversity penalty is dominating alignment.</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>grad_norm</code></td>
|
||
<td>0.1 – 3.0</td>
|
||
<td>Gradient magnitude. Values of 0.0 indicate zero-advantage skip (normal). Values above 10 suggest instability.</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>entropy</code></td>
|
||
<td>0.05 – 0.5</td>
|
||
<td>Policy entropy. Values below 0.01 suggest mode collapse.</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>IS ratio min</code></td>
|
||
<td>Above 0.1</td>
|
||
<td>Importance sampling ratio minimum. Near-zero values mean the policy is too far off-policy; increase <code>vllm_sync_interval</code>.</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</section>
|
||
<section id="console-log-example" class="level3">
|
||
<h3 class="anchored" data-anchor-id="console-log-example">Console Log Example</h3>
|
||
<p>During training, you will see periodic EBFT reward logs:</p>
|
||
<pre><code>ebft reward | align +0.412 ^ | divers +0.023 v | cfm 4.231 v | reward +0.389 ^</code></pre>
|
||
<p>The arrows indicate the desired direction: alignment and reward should trend upward, while diversity and CFM loss should trend downward.</p>
|
||
</section>
|
||
<section id="troubleshooting" class="level3">
|
||
<h3 class="anchored" data-anchor-id="troubleshooting">Troubleshooting</h3>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 33%">
|
||
<col style="width: 48%">
|
||
<col style="width: 18%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Symptom</th>
|
||
<th>Likely Cause</th>
|
||
<th>Fix</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><code>alignment</code> stays below 0.1</td>
|
||
<td>Feature layers not capturing useful information</td>
|
||
<td>Try different <code>feature_layers</code> or <code>embed_method</code></td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>diversity</code> exceeds 1.0</td>
|
||
<td>Mode collapse – generations are too similar</td>
|
||
<td>Increase <code>diversity_coef</code> or <code>temperature</code></td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>reward</code> stuck at -1.0</td>
|
||
<td>Diversity penalty dominates alignment</td>
|
||
<td>Reduce <code>diversity_coef</code> or increase <code>alignment_coef</code></td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>grad_norm</code> consistently 0.0</td>
|
||
<td>All micro-batches have zero advantage</td>
|
||
<td>Increase <code>num_generations</code> or check data quality</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>CheckpointError</code> in strided mode</td>
|
||
<td>Incompatible gradient checkpointing settings</td>
|
||
<td>Set <code>use_reentrant: true</code> in <code>gradient_checkpointing_kwargs</code></td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td>OOM during training</td>
|
||
<td>Logits tensor too large</td>
|
||
<td>Reduce <code>sequence_len</code> or <code>micro_batch_size</code>; strided mode uses chunked lm_head to mitigate this</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td>vLLM 500 errors</td>
|
||
<td><code>truncate_prompt_tokens</code> not supported</td>
|
||
<td>Ensure you are using <code>axolotl vllm-serve</code> (not <code>trl vllm-serve</code>)</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</section>
|
||
<section id="feature-network-memory" class="level3">
|
||
<h3 class="anchored" data-anchor-id="feature-network-memory">Feature Network Memory</h3>
|
||
<p>In PEFT (LoRA) mode, the feature network shares base weights with the actor model by using the <code>disable_adapter()</code> context manager. This saves an entire model copy in VRAM (approximately 1–16 GB depending on model size). For non-PEFT training, a separate frozen deepcopy is created.</p>
|
||
<div class="callout callout-style-default callout-note callout-titled">
|
||
<div class="callout-header d-flex align-content-center">
|
||
<div class="callout-icon-container">
|
||
<i class="callout-icon"></i>
|
||
</div>
|
||
<div class="callout-title-container flex-fill">
|
||
Note
|
||
</div>
|
||
</div>
|
||
<div class="callout-body-container callout-body">
|
||
<p>The <code>disable_adapter()</code> approach relies on an invariant: <code>merge_adapter()</code> is never called on the base weights. All weight sync paths (LoRA sync, HTTP, NCCL) compute merged weights as new tensors or save the adapter to the filesystem, leaving base weights unmodified.</p>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="examples" class="level2">
|
||
<h2 class="anchored" data-anchor-id="examples">Examples</h2>
|
||
<p>Complete example configurations are available in <code>examples/ebft/</code>:</p>
|
||
<table class="caption-top table">
|
||
<colgroup>
|
||
<col style="width: 23%">
|
||
<col style="width: 20%">
|
||
<col style="width: 17%">
|
||
<col style="width: 38%">
|
||
</colgroup>
|
||
<thead>
|
||
<tr class="header">
|
||
<th>Config</th>
|
||
<th>Model</th>
|
||
<th>Mode</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="odd">
|
||
<td><code>llama-1b-ebft-strided-structured.yaml</code></td>
|
||
<td>Llama 3.2 1B</td>
|
||
<td>Strided</td>
|
||
<td>Single-GPU strided training on code data</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>qwen3-4b-ebft-structured.yaml</code></td>
|
||
<td>Qwen3 4B</td>
|
||
<td>Structured (sync)</td>
|
||
<td>Two-GPU structured training</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>qwen3-4b-ebft-structured-async.yaml</code></td>
|
||
<td>Qwen3 4B</td>
|
||
<td>Structured (async)</td>
|
||
<td>Two-GPU async training with prefetch</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>qwen3-8b-ebft-structured.yaml</code></td>
|
||
<td>Qwen3 8B</td>
|
||
<td>Structured (sync)</td>
|
||
<td>Two-GPU structured training for larger model</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>qwen35-4b-ebft-structured.yaml</code></td>
|
||
<td>Qwen3.5 4B</td>
|
||
<td>Structured (sync)</td>
|
||
<td>Two-GPU with Qwen3.5</td>
|
||
</tr>
|
||
<tr class="even">
|
||
<td><code>qwen35-4b-ebft-structured-async.yaml</code></td>
|
||
<td>Qwen3.5 4B</td>
|
||
<td>Structured (async)</td>
|
||
<td>Two-GPU async with Qwen3.5</td>
|
||
</tr>
|
||
<tr class="odd">
|
||
<td><code>qwen35-9b-ebft-structured.yaml</code></td>
|
||
<td>Qwen3.5 9B</td>
|
||
<td>Structured (sync)</td>
|
||
<td>Two-GPU structured for 9B model</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
|
||
|
||
</section>
|
||
|
||
</main> <!-- /main -->
|
||
<script id="quarto-html-after-body" type="application/javascript">
|
||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||
const icon = "";
|
||
const anchorJS = new window.AnchorJS();
|
||
anchorJS.options = {
|
||
placement: 'right',
|
||
icon: icon
|
||
};
|
||
anchorJS.add('.anchored');
|
||
const isCodeAnnotation = (el) => {
|
||
for (const clz of el.classList) {
|
||
if (clz.startsWith('code-annotation-')) {
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
const onCopySuccess = function(e) {
|
||
// button target
|
||
const button = e.trigger;
|
||
// don't keep focus
|
||
button.blur();
|
||
// flash "checked"
|
||
button.classList.add('code-copy-button-checked');
|
||
var currentTitle = button.getAttribute("title");
|
||
button.setAttribute("title", "Copied!");
|
||
let tooltip;
|
||
if (window.bootstrap) {
|
||
button.setAttribute("data-bs-toggle", "tooltip");
|
||
button.setAttribute("data-bs-placement", "left");
|
||
button.setAttribute("data-bs-title", "Copied!");
|
||
tooltip = new bootstrap.Tooltip(button,
|
||
{ trigger: "manual",
|
||
customClass: "code-copy-button-tooltip",
|
||
offset: [0, -8]});
|
||
tooltip.show();
|
||
}
|
||
setTimeout(function() {
|
||
if (tooltip) {
|
||
tooltip.hide();
|
||
button.removeAttribute("data-bs-title");
|
||
button.removeAttribute("data-bs-toggle");
|
||
button.removeAttribute("data-bs-placement");
|
||
}
|
||
button.setAttribute("title", currentTitle);
|
||
button.classList.remove('code-copy-button-checked');
|
||
}, 1000);
|
||
// clear code selection
|
||
e.clearSelection();
|
||
}
|
||
const getTextToCopy = function(trigger) {
|
||
const outerScaffold = trigger.parentElement.cloneNode(true);
|
||
const codeEl = outerScaffold.querySelector('code');
|
||
for (const childEl of codeEl.children) {
|
||
if (isCodeAnnotation(childEl)) {
|
||
childEl.remove();
|
||
}
|
||
}
|
||
return codeEl.innerText;
|
||
}
|
||
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
|
||
text: getTextToCopy
|
||
});
|
||
clipboard.on('success', onCopySuccess);
|
||
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
|
||
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
|
||
text: getTextToCopy,
|
||
container: window.document.getElementById('quarto-embedded-source-code-modal')
|
||
});
|
||
clipboardModal.on('success', onCopySuccess);
|
||
}
|
||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
|
||
var mailtoRegex = new RegExp(/^mailto:/);
|
||
var filterRegex = new RegExp("https:\/\/docs\.axolotl\.ai");
|
||
var isInternal = (href) => {
|
||
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
|
||
}
|
||
// Inspect non-navigation links and adorn them if external
|
||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
|
||
for (var i=0; i<links.length; i++) {
|
||
const link = links[i];
|
||
if (!isInternal(link.href)) {
|
||
// undo the damage that might have been done by quarto-nav.js in the case of
|
||
// links that we want to consider external
|
||
if (link.dataset.originalHref !== undefined) {
|
||
link.href = link.dataset.originalHref;
|
||
}
|
||
}
|
||
}
|
||
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
|
||
const config = {
|
||
allowHTML: true,
|
||
maxWidth: 500,
|
||
delay: 100,
|
||
arrow: false,
|
||
appendTo: function(el) {
|
||
return el.parentElement;
|
||
},
|
||
interactive: true,
|
||
interactiveBorder: 10,
|
||
theme: 'quarto',
|
||
placement: 'bottom-start',
|
||
};
|
||
if (contentFn) {
|
||
config.content = contentFn;
|
||
}
|
||
if (onTriggerFn) {
|
||
config.onTrigger = onTriggerFn;
|
||
}
|
||
if (onUntriggerFn) {
|
||
config.onUntrigger = onUntriggerFn;
|
||
}
|
||
window.tippy(el, config);
|
||
}
|
||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||
for (var i=0; i<noterefs.length; i++) {
|
||
const ref = noterefs[i];
|
||
tippyHover(ref, function() {
|
||
// use id or data attribute instead here
|
||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||
try { href = new URL(href).hash; } catch {}
|
||
const id = href.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note) {
|
||
return note.innerHTML;
|
||
} else {
|
||
return "";
|
||
}
|
||
});
|
||
}
|
||
const xrefs = window.document.querySelectorAll('a.quarto-xref');
|
||
const processXRef = (id, note) => {
|
||
// Strip column container classes
|
||
const stripColumnClz = (el) => {
|
||
el.classList.remove("page-full", "page-columns");
|
||
if (el.children) {
|
||
for (const child of el.children) {
|
||
stripColumnClz(child);
|
||
}
|
||
}
|
||
}
|
||
stripColumnClz(note)
|
||
if (id === null || id.startsWith('sec-')) {
|
||
// Special case sections, only their first couple elements
|
||
const container = document.createElement("div");
|
||
if (note.children && note.children.length > 2) {
|
||
container.appendChild(note.children[0].cloneNode(true));
|
||
for (let i = 1; i < note.children.length; i++) {
|
||
const child = note.children[i];
|
||
if (child.tagName === "P" && child.innerText === "") {
|
||
continue;
|
||
} else {
|
||
container.appendChild(child.cloneNode(true));
|
||
break;
|
||
}
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(container);
|
||
}
|
||
return container.innerHTML
|
||
} else {
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
return note.innerHTML;
|
||
}
|
||
} else {
|
||
// Remove any anchor links if they are present
|
||
const anchorLink = note.querySelector('a.anchorjs-link');
|
||
if (anchorLink) {
|
||
anchorLink.remove();
|
||
}
|
||
if (window.Quarto?.typesetMath) {
|
||
window.Quarto.typesetMath(note);
|
||
}
|
||
if (note.classList.contains("callout")) {
|
||
return note.outerHTML;
|
||
} else {
|
||
return note.innerHTML;
|
||
}
|
||
}
|
||
}
|
||
for (var i=0; i<xrefs.length; i++) {
|
||
const xref = xrefs[i];
|
||
tippyHover(xref, undefined, function(instance) {
|
||
instance.disable();
|
||
let url = xref.getAttribute('href');
|
||
let hash = undefined;
|
||
if (url.startsWith('#')) {
|
||
hash = url;
|
||
} else {
|
||
try { hash = new URL(url).hash; } catch {}
|
||
}
|
||
if (hash) {
|
||
const id = hash.replace(/^#\/?/, "");
|
||
const note = window.document.getElementById(id);
|
||
if (note !== null) {
|
||
try {
|
||
const html = processXRef(id, note.cloneNode(true));
|
||
instance.setContent(html);
|
||
} finally {
|
||
instance.enable();
|
||
instance.show();
|
||
}
|
||
} else {
|
||
// See if we can fetch this
|
||
fetch(url.split('#')[0])
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.getElementById(id);
|
||
if (note !== null) {
|
||
const html = processXRef(id, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
} else {
|
||
// See if we can fetch a full url (with no hash to target)
|
||
// This is a special case and we should probably do some content thinning / targeting
|
||
fetch(url)
|
||
.then(res => res.text())
|
||
.then(html => {
|
||
const parser = new DOMParser();
|
||
const htmlDoc = parser.parseFromString(html, "text/html");
|
||
const note = htmlDoc.querySelector('main.content');
|
||
if (note !== null) {
|
||
// This should only happen for chapter cross references
|
||
// (since there is no id in the URL)
|
||
// remove the first header
|
||
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
|
||
note.children[0].remove();
|
||
}
|
||
const html = processXRef(null, note);
|
||
instance.setContent(html);
|
||
}
|
||
}).finally(() => {
|
||
instance.enable();
|
||
instance.show();
|
||
});
|
||
}
|
||
}, function(instance) {
|
||
});
|
||
}
|
||
let selectedAnnoteEl;
|
||
const selectorForAnnotation = ( cell, annotation) => {
|
||
let cellAttr = 'data-code-cell="' + cell + '"';
|
||
let lineAttr = 'data-code-annotation="' + annotation + '"';
|
||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
|
||
return selector;
|
||
}
|
||
const selectCodeLines = (annoteEl) => {
|
||
const doc = window.document;
|
||
const targetCell = annoteEl.getAttribute("data-target-cell");
|
||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
|
||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
|
||
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
|
||
const lineIds = lines.map((line) => {
|
||
return targetCell + "-" + line;
|
||
})
|
||
let top = null;
|
||
let height = null;
|
||
let parent = null;
|
||
if (lineIds.length > 0) {
|
||
//compute the position of the single el (top and bottom and make a div)
|
||
const el = window.document.getElementById(lineIds[0]);
|
||
top = el.offsetTop;
|
||
height = el.offsetHeight;
|
||
parent = el.parentElement.parentElement;
|
||
if (lineIds.length > 1) {
|
||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
|
||
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
|
||
height = bottom - top;
|
||
}
|
||
if (top !== null && height !== null && parent !== null) {
|
||
// cook up a div (if necessary) and position it
|
||
let div = window.document.getElementById("code-annotation-line-highlight");
|
||
if (div === null) {
|
||
div = window.document.createElement("div");
|
||
div.setAttribute("id", "code-annotation-line-highlight");
|
||
div.style.position = 'absolute';
|
||
parent.appendChild(div);
|
||
}
|
||
div.style.top = top - 2 + "px";
|
||
div.style.height = height + 4 + "px";
|
||
div.style.left = 0;
|
||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
|
||
if (gutterDiv === null) {
|
||
gutterDiv = window.document.createElement("div");
|
||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
|
||
gutterDiv.style.position = 'absolute';
|
||
const codeCell = window.document.getElementById(targetCell);
|
||
const gutter = codeCell.querySelector('.code-annotation-gutter');
|
||
gutter.appendChild(gutterDiv);
|
||
}
|
||
gutterDiv.style.top = top - 2 + "px";
|
||
gutterDiv.style.height = height + 4 + "px";
|
||
}
|
||
selectedAnnoteEl = annoteEl;
|
||
}
|
||
};
|
||
const unselectCodeLines = () => {
|
||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
|
||
elementsIds.forEach((elId) => {
|
||
const div = window.document.getElementById(elId);
|
||
if (div) {
|
||
div.remove();
|
||
}
|
||
});
|
||
selectedAnnoteEl = undefined;
|
||
};
|
||
// Handle positioning of the toggle
|
||
window.addEventListener(
|
||
"resize",
|
||
throttle(() => {
|
||
elRect = undefined;
|
||
if (selectedAnnoteEl) {
|
||
selectCodeLines(selectedAnnoteEl);
|
||
}
|
||
}, 10)
|
||
);
|
||
function throttle(fn, ms) {
|
||
let throttle = false;
|
||
let timer;
|
||
return (...args) => {
|
||
if(!throttle) { // first call gets through
|
||
fn.apply(this, args);
|
||
throttle = true;
|
||
} else { // all the others get throttled
|
||
if(timer) clearTimeout(timer); // cancel #2
|
||
timer = setTimeout(() => {
|
||
fn.apply(this, args);
|
||
timer = throttle = false;
|
||
}, ms);
|
||
}
|
||
};
|
||
}
|
||
// Attach click handler to the DT
|
||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
|
||
for (const annoteDlNode of annoteDls) {
|
||
annoteDlNode.addEventListener('click', (event) => {
|
||
const clickedEl = event.target;
|
||
if (clickedEl !== selectedAnnoteEl) {
|
||
unselectCodeLines();
|
||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
|
||
if (activeEl) {
|
||
activeEl.classList.remove('code-annotation-active');
|
||
}
|
||
selectCodeLines(clickedEl);
|
||
clickedEl.classList.add('code-annotation-active');
|
||
} else {
|
||
// Unselect the line
|
||
unselectCodeLines();
|
||
clickedEl.classList.remove('code-annotation-active');
|
||
}
|
||
});
|
||
}
|
||
const findCites = (el) => {
|
||
const parentEl = el.parentElement;
|
||
if (parentEl) {
|
||
const cites = parentEl.dataset.cites;
|
||
if (cites) {
|
||
return {
|
||
el,
|
||
cites: cites.split(' ')
|
||
};
|
||
} else {
|
||
return findCites(el.parentElement)
|
||
}
|
||
} else {
|
||
return undefined;
|
||
}
|
||
};
|
||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||
for (var i=0; i<bibliorefs.length; i++) {
|
||
const ref = bibliorefs[i];
|
||
const citeInfo = findCites(ref);
|
||
if (citeInfo) {
|
||
tippyHover(citeInfo.el, function() {
|
||
var popup = window.document.createElement('div');
|
||
citeInfo.cites.forEach(function(cite) {
|
||
var citeDiv = window.document.createElement('div');
|
||
citeDiv.classList.add('hanging-indent');
|
||
citeDiv.classList.add('csl-entry');
|
||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||
if (biblioDiv) {
|
||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||
}
|
||
popup.appendChild(citeDiv);
|
||
});
|
||
return popup.innerHTML;
|
||
});
|
||
}
|
||
}
|
||
});
|
||
</script>
|
||
</div> <!-- /content -->
|
||
|
||
|
||
|
||
|
||
</body></html> |