Built site for gh-pages
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="quarto-1.7.34">
|
||||
<meta name="generator" content="quarto-1.8.24">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
|
||||
@@ -67,14 +67,15 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<link href="../favicon.jpg" rel="icon" type="image/jpeg">
|
||||
<script src="../site_libs/quarto-html/quarto.js" type="module"></script>
|
||||
<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
|
||||
<script src="../site_libs/quarto-html/axe/axe-check.js" type="module"></script>
|
||||
<script src="../site_libs/quarto-html/popper.min.js"></script>
|
||||
<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-befe23ebd2f54d8af2c8a89d1a1611f1.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-b651517ce65839d647a86e2780455cfb.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../site_libs/bootstrap/bootstrap-e9895ec3143e9833a687747e8d39d226.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
|
||||
<link href="../site_libs/bootstrap/bootstrap-f9d679a32da2b248d4ca48a0e58e089e.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
@@ -124,7 +125,8 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a href="../index.html" class="navbar-brand navbar-brand-logo">
|
||||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo">
|
||||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo light-content">
|
||||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo dark-content">
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
@@ -150,6 +152,10 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||||
<!-- sidebar -->
|
||||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||||
<div class="pt-lg-2 mt-2 text-left sidebar-header">
|
||||
<a href="../index.html" class="sidebar-logo-link">
|
||||
</a>
|
||||
</div>
|
||||
<div class="sidebar-menu-container">
|
||||
<ul class="list-unstyled mt-1">
|
||||
<li class="sidebar-item">
|
||||
@@ -601,16 +607,16 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<ul>
|
||||
<li>If you are in dev environment</li>
|
||||
</ul>
|
||||
<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">python</span> scripts/cutcrossentropy_install.py <span class="kw">|</span> <span class="fu">sh</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">python</span> scripts/cutcrossentropy_install.py <span class="kw">|</span> <span class="fu">sh</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<ul>
|
||||
<li>If you are installing from pip</li>
|
||||
</ul>
|
||||
<div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip3</span> uninstall <span class="at">-y</span> cut-cross-entropy <span class="kw">&&</span> <span class="ex">pip3</span> install <span class="st">"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@c6a32c5"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip3</span> uninstall <span class="at">-y</span> cut-cross-entropy <span class="kw">&&</span> <span class="ex">pip3</span> install <span class="st">"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@c6a32c5"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="usage" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="usage">Usage</h3>
|
||||
<div class="sourceCode" id="cb3"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb3"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="supported-models" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="supported-models">Supported Models</h3>
|
||||
@@ -656,7 +662,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
</section>
|
||||
<section id="citation" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="citation">Citation</h3>
|
||||
<div class="sourceCode" id="cb4"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="va">@article</span>{<span class="ot">wijmans2024cut</span>,</span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb4"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="va">@article</span>{<span class="ot">wijmans2024cut</span>,</span>
|
||||
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">author</span> = {Erik Wijmans and</span>
|
||||
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a> Brody Huval and</span>
|
||||
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a> Alexander Hertzberg and</span>
|
||||
@@ -666,7 +672,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">journal</span> = {arXiv},</span>
|
||||
<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a> <span class="dt">year</span> = {2024},</span>
|
||||
<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a> <span class="dt">url</span> = {https://arxiv.org/abs/2411.09009},</span>
|
||||
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/cut_cross_entropy">here</a></p>
|
||||
</section>
|
||||
</section>
|
||||
@@ -674,8 +680,8 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<h2 class="anchored" data-anchor-id="densemixer">DenseMixer</h2>
|
||||
<p>See <a href="https://github.com/yaof20/DenseMixer/">DenseMixer</a></p>
|
||||
<p>Simply add the following to your axolotl YAML config:</p>
|
||||
<div class="sourceCode" id="cb5"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.densemixer.DenseMixerPlugin</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb5"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.densemixer.DenseMixerPlugin</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/densemixer">here</a></p>
|
||||
</section>
|
||||
<section id="diffusion-lm-training-plugin-for-axolotl" class="level2">
|
||||
@@ -706,10 +712,10 @@ bidirectional context.</p>
|
||||
<h3 class="anchored" data-anchor-id="basic-configuration">Basic Configuration</h3>
|
||||
<p>You can also modify your existing configs to enable / customize diffusion training.</p>
|
||||
<p>Add the following to your Axolotl config:</p>
|
||||
<div class="sourceCode" id="cb6"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.diffusion.DiffusionPlugin</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb6"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.diffusion.DiffusionPlugin</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>And, configure the nested <code>diffusion</code> block (defaults shown):</p>
|
||||
<div class="sourceCode" id="cb7"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">diffusion</span><span class="kw">:</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb7"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">diffusion</span><span class="kw">:</span></span>
|
||||
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">noise_schedule</span><span class="kw">:</span><span class="at"> linear</span><span class="co"> # or "cosine"</span></span>
|
||||
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">min_mask_ratio</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.1</span></span>
|
||||
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">max_mask_ratio</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.9</span></span>
|
||||
@@ -728,7 +734,7 @@ bidirectional context.</p>
|
||||
<span id="cb7-17"><a href="#cb7-17" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">num_generation_samples</span><span class="kw">:</span><span class="at"> </span><span class="dv">3</span></span>
|
||||
<span id="cb7-18"><a href="#cb7-18" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">generation_steps</span><span class="kw">:</span><span class="at"> </span><span class="dv">128</span></span>
|
||||
<span id="cb7-19"><a href="#cb7-19" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">generation_temperature</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.0</span></span>
|
||||
<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">generation_max_length</span><span class="kw">:</span><span class="at"> </span><span class="dv">100</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb7-20"><a href="#cb7-20" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">generation_max_length</span><span class="kw">:</span><span class="at"> </span><span class="dv">100</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="supported-models-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="supported-models-1">Supported Models</h3>
|
||||
@@ -749,7 +755,7 @@ create an <a href="https://github.com/axolotl-ai-cloud/axolotl/issues">issue</a>
|
||||
<section id="diffusion-loss" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="diffusion-loss">Diffusion Loss</h3>
|
||||
<p>Loss is computed only on masked tokens with (optional) importance weighting:</p>
|
||||
<div class="sourceCode" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>loss <span class="op">=</span> <span class="bu">sum</span>(cross_entropy(pred, target) <span class="op">/</span> p_mask) <span class="op">/</span> total_tokens</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>loss <span class="op">=</span> <span class="bu">sum</span>(cross_entropy(pred, target) <span class="op">/</span> p_mask) <span class="op">/</span> total_tokens</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="sample-generation" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="sample-generation">Sample Generation</h3>
|
||||
@@ -820,20 +826,20 @@ The quick brown fox jumps over the loud dog</code></pre>
|
||||
<p>See https://github.com/ironjr/grokfast</p>
|
||||
<section id="usage-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="usage-1">Usage</h3>
|
||||
<div class="sourceCode" id="cb12"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb12"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.grokfast.GrokfastPlugin</span></span>
|
||||
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a><span class="fu">grokfast_alpha</span><span class="kw">:</span><span class="at"> </span><span class="fl">2.0</span></span>
|
||||
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a><span class="fu">grokfast_lamb</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.98</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a><span class="fu">grokfast_lamb</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.98</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="citation-1" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="citation-1">Citation</h3>
|
||||
<div class="sourceCode" id="cb13"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="va">@article</span>{<span class="ot">lee2024grokfast</span>,</span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb13"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="va">@article</span>{<span class="ot">lee2024grokfast</span>,</span>
|
||||
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">title</span>={{Grokfast}: Accelerated Grokking by Amplifying Slow Gradients},</span>
|
||||
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">author</span>={Lee, Jaerin and Kang, Bong Gyun and Kim, Kihoon and Lee, Kyoung Mu},</span>
|
||||
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">journal</span>={arXiv preprint arXiv:2405.20233},</span>
|
||||
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">year</span>={2024}</span>
|
||||
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/grokfast">here</a></p>
|
||||
</section>
|
||||
</section>
|
||||
@@ -841,7 +847,7 @@ The quick brown fox jumps over the loud dog</code></pre>
|
||||
<h2 class="anchored" data-anchor-id="knowledge-distillation-kd">Knowledge Distillation (KD)</h2>
|
||||
<section id="usage-2" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="usage-2">Usage</h3>
|
||||
<div class="sourceCode" id="cb14"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb14"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="st">"axolotl.integrations.kd.KDPlugin"</span></span>
|
||||
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a><span class="fu">kd_trainer</span><span class="kw">:</span><span class="at"> </span><span class="ch">True</span></span>
|
||||
@@ -855,7 +861,7 @@ The quick brown fox jumps over the loud dog</code></pre>
|
||||
<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">path</span><span class="kw">:</span><span class="at"> ...</span></span>
|
||||
<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> </span><span class="st">"axolotl.integrations.kd.chat_template"</span></span>
|
||||
<span id="cb14-14"><a href="#cb14-14" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">field_messages</span><span class="kw">:</span><span class="at"> </span><span class="st">"messages_combined"</span></span>
|
||||
<span id="cb14-15"><a href="#cb14-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">logprobs_field</span><span class="kw">:</span><span class="at"> </span><span class="st">"llm_text_generation_vllm_logprobs"</span><span class="co"> # for kd only, field of logprobs</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb14-15"><a href="#cb14-15" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">logprobs_field</span><span class="kw">:</span><span class="at"> </span><span class="st">"llm_text_generation_vllm_logprobs"</span><span class="co"> # for kd only, field of logprobs</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>An example dataset can be found at <a href="https://huggingface.co/datasets/axolotl-ai-co/evolkit-logprobs-pipeline-75k-v2-sample"><code>axolotl-ai-co/evolkit-logprobs-pipeline-75k-v2-sample</code></a></p>
|
||||
<p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/kd">here</a></p>
|
||||
</section>
|
||||
@@ -870,7 +876,7 @@ The quick brown fox jumps over the loud dog</code></pre>
|
||||
<h3 class="anchored" data-anchor-id="requirements-1">Requirements</h3>
|
||||
<ul>
|
||||
<li><p>Axolotl with <code>llmcompressor</code> extras:</p>
|
||||
<div class="sourceCode" id="cb15"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="st">"axolotl[llmcompressor]"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div></li>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb15"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="st">"axolotl[llmcompressor]"</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div></li>
|
||||
<li><p>Requires <code>llmcompressor >= 0.5.1</code></p></li>
|
||||
</ul>
|
||||
<p>This will install all necessary dependencies to fine-tune sparsified models using the integration.</p>
|
||||
@@ -879,7 +885,7 @@ The quick brown fox jumps over the loud dog</code></pre>
|
||||
<section id="usage-3" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="usage-3">Usage</h3>
|
||||
<p>To enable sparse fine-tuning with this integration, include the plugin in your Axolotl config:</p>
|
||||
<div class="sourceCode" id="cb16"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb16"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.llm_compressor.LLMCompressorPlugin</span></span>
|
||||
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="fu">llmcompressor</span><span class="kw">:</span></span>
|
||||
@@ -897,7 +903,7 @@ The quick brown fox jumps over the loud dog</code></pre>
|
||||
<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="st">'re:.*down_proj.weight'</span><span class="kw">,</span></span>
|
||||
<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">]</span></span>
|
||||
<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">start</span><span class="kw">:</span><span class="at"> </span><span class="dv">0</span></span>
|
||||
<span id="cb16-19"><a href="#cb16-19" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">save_compressed</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb16-19"><a href="#cb16-19" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">save_compressed</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>This plugin <strong>does not apply pruning or sparsification itself</strong> — it is intended for <strong>fine-tuning models that have already been sparsified</strong>.</p>
|
||||
<p>Pre-sparsified checkpoints can be:
|
||||
- Generated using <a href="https://github.com/vllm-project/llm-compressor">LLMCompressor</a>
|
||||
@@ -924,7 +930,7 @@ The quick brown fox jumps over the loud dog</code></pre>
|
||||
<p>After fine-tuning your sparse model, you can leverage vLLM for efficient inference.
|
||||
You can also use LLMCompressor to apply additional quantization to your fine-tuned
|
||||
sparse model before inference for even greater performance benefits.:</p>
|
||||
<div class="sourceCode" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> vllm <span class="im">import</span> LLM, SamplingParams</span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> vllm <span class="im">import</span> LLM, SamplingParams</span>
|
||||
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>prompts <span class="op">=</span> [</span>
|
||||
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a> <span class="st">"Hello, my name is"</span>,</span>
|
||||
@@ -939,7 +945,7 @@ sparse model before inference for even greater performance benefits.:</p>
|
||||
<span id="cb17-13"><a href="#cb17-13" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> output <span class="kw">in</span> outputs:</span>
|
||||
<span id="cb17-14"><a href="#cb17-14" aria-hidden="true" tabindex="-1"></a> prompt <span class="op">=</span> output.prompt</span>
|
||||
<span id="cb17-15"><a href="#cb17-15" aria-hidden="true" tabindex="-1"></a> generated_text <span class="op">=</span> output.outputs[<span class="dv">0</span>].text</span>
|
||||
<span id="cb17-16"><a href="#cb17-16" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"Prompt: </span><span class="sc">{</span>prompt<span class="sc">!r}</span><span class="ss">, Generated text: </span><span class="sc">{</span>generated_text<span class="sc">!r}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb17-16"><a href="#cb17-16" aria-hidden="true" tabindex="-1"></a> <span class="bu">print</span>(<span class="ss">f"Prompt: </span><span class="sc">{</span>prompt<span class="sc">!r}</span><span class="ss">, Generated text: </span><span class="sc">{</span>generated_text<span class="sc">!r}</span><span class="ss">"</span>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>For more details on vLLM’s capabilities and advanced configuration options, see the <a href="https://docs.vllm.ai/">official vLLM documentation</a>.</p>
|
||||
</section>
|
||||
<section id="learn-more" class="level3">
|
||||
@@ -955,7 +961,7 @@ sparse model before inference for even greater performance benefits.:</p>
|
||||
<p>See https://github.com/EleutherAI/lm-evaluation-harness</p>
|
||||
<section id="usage-4" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="usage-4">Usage</h3>
|
||||
<div class="sourceCode" id="cb18"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb18"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.lm_eval.LMEvalPlugin</span></span>
|
||||
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_tasks</span><span class="kw">:</span></span>
|
||||
@@ -964,11 +970,11 @@ sparse model before inference for even greater performance benefits.:</p>
|
||||
<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> arc_easy</span></span>
|
||||
<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a><span class="fu">lm_eval_batch_size</span><span class="kw">:</span><span class="co"> # Batch size for evaluation</span></span>
|
||||
<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span><span class="co"> # Directory to save evaluation results</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a><span class="fu">output_dir</span><span class="kw">:</span><span class="co"> # Directory to save evaluation results</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="citation-2" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="citation-2">Citation</h3>
|
||||
<div class="sourceCode" id="cb19"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="va">@misc</span>{<span class="ot">eval</span>-<span class="ot">harness</span>,</span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb19"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="va">@misc</span>{<span class="ot">eval</span>-<span class="ot">harness</span>,</span>
|
||||
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">author</span> = {Gao, Leo and Tow, Jonathan and Abbasi, Baber and Biderman, Stella and Black, Sid and DiPofi, Anthony and Foster, Charles and Golding, Laurence and Hsu, Jeffrey and Le Noac'h, Alain and Li, Haonan and McDonell, Kyle and Muennighoff, Niklas and Ociepa, Chris and Phang, Jason and Reynolds, Laria and Schoelkopf, Hailey and Skowron, Aviya and Sutawika, Lintang and Tang, Eric and Thite, Anish and Wang, Ben and Wang, Kevin and Zou, Andy},</span>
|
||||
<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">title</span> = {A framework for few-shot language model evaluation},</span>
|
||||
<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">month</span> = 07,</span>
|
||||
@@ -977,7 +983,7 @@ sparse model before inference for even greater performance benefits.:</p>
|
||||
<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a> <span class="dt">version</span> = {v0.4.3},</span>
|
||||
<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">doi</span> = {10.5281/zenodo.12608602},</span>
|
||||
<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a> <span class="dt">url</span> = {https://zenodo.org/records/12608602}</span>
|
||||
<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/lm_eval">here</a></p>
|
||||
</section>
|
||||
</section>
|
||||
@@ -992,13 +998,13 @@ sparse model before inference for even greater performance benefits.:</p>
|
||||
<p>See https://github.com/linkedin/Liger-Kernel</p>
|
||||
<section id="usage-5" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="usage-5">Usage</h3>
|
||||
<div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb20"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.liger.LigerPlugin</span></span>
|
||||
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_rope</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_rms_norm</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_glu_activation</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_layer_norm</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_fused_linear_cross_entropy</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="fu">liger_fused_linear_cross_entropy</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="supported-models-2" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="supported-models-2">Supported Models</h3>
|
||||
@@ -1024,7 +1030,7 @@ sparse model before inference for even greater performance benefits.:</p>
|
||||
</section>
|
||||
<section id="citation-3" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="citation-3">Citation</h3>
|
||||
<div class="sourceCode" id="cb21"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="va">@article</span>{<span class="ot">hsu2024ligerkernelefficienttriton</span>,</span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb21"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="va">@article</span>{<span class="ot">hsu2024ligerkernelefficienttriton</span>,</span>
|
||||
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">title</span>={Liger Kernel: Efficient Triton Kernels for LLM Training},</span>
|
||||
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">author</span>={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen},</span>
|
||||
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">year</span>={2024},</span>
|
||||
@@ -1033,7 +1039,7 @@ sparse model before inference for even greater performance benefits.:</p>
|
||||
<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a> <span class="dt">primaryClass</span>={cs.LG},</span>
|
||||
<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">url</span>={https://arxiv.org/abs/2410.10989},</span>
|
||||
<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a> <span class="dt">journal</span>={arXiv preprint arXiv:2410.10989},</span>
|
||||
<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/liger">here</a></p>
|
||||
</section>
|
||||
</section>
|
||||
@@ -1049,15 +1055,15 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
|
||||
</section>
|
||||
<section id="usage-6" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="usage-6">Usage</h3>
|
||||
<div class="sourceCode" id="cb22"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb22"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.spectrum.SpectrumPlugin</span></span>
|
||||
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a><span class="fu">spectrum_top_fraction</span><span class="kw">:</span><span class="at"> </span><span class="fl">0.5</span></span>
|
||||
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a><span class="fu">spectrum_model_name</span><span class="kw">:</span><span class="at"> meta-llama/Meta-Llama-3.1-8B</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a><span class="fu">spectrum_model_name</span><span class="kw">:</span><span class="at"> meta-llama/Meta-Llama-3.1-8B</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="citation-4" class="level3">
|
||||
<h3 class="anchored" data-anchor-id="citation-4">Citation</h3>
|
||||
<div class="sourceCode" id="cb23"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="va">@misc</span>{<span class="ot">hartford2024spectrumtargetedtrainingsignal</span>,</span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb23"><pre class="sourceCode bib code-with-copy"><code class="sourceCode bibtex"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="va">@misc</span>{<span class="ot">hartford2024spectrumtargetedtrainingsignal</span>,</span>
|
||||
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">title</span>={Spectrum: Targeted Training on Signal to Noise Ratio},</span>
|
||||
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">author</span>={Eric Hartford and Lucas Atkins and Fernando Fernandes Neto and David Golchinfar},</span>
|
||||
<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">year</span>={2024},</span>
|
||||
@@ -1065,7 +1071,7 @@ By identifying the top n% of layers with the highest SNR, you can optimize train
|
||||
<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a> <span class="dt">archivePrefix</span>={arXiv},</span>
|
||||
<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a> <span class="dt">primaryClass</span>={cs.LG},</span>
|
||||
<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">url</span>={https://arxiv.org/abs/2406.06623},</span>
|
||||
<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a>}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>Please see reference <a href="https://github.com/axolotl-ai-cloud/axolotl/tree/main/src/axolotl/integrations/spectrum">here</a></p>
|
||||
</section>
|
||||
</section>
|
||||
@@ -1109,10 +1115,10 @@ Warning
|
||||
</div>
|
||||
<div class="callout-body-container callout-body">
|
||||
<p>If you could not load your integration, please ensure you are pip installing in editable mode.</p>
|
||||
<div class="sourceCode" id="cb24"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="at">-e</span> .</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb24"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install <span class="at">-e</span> .</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>and correctly spelled the integration name in the config file.</p>
|
||||
<div class="sourceCode" id="cb25"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.your_integration_name.YourIntegrationPlugin</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb25"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plugins</span><span class="kw">:</span></span>
|
||||
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> axolotl.integrations.your_integration_name.YourIntegrationPlugin</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="callout callout-style-default callout-note callout-titled">
|
||||
@@ -1185,13 +1191,14 @@ Note
|
||||
e.clearSelection();
|
||||
}
|
||||
const getTextToCopy = function(trigger) {
|
||||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
const outerScaffold = trigger.parentElement.cloneNode(true);
|
||||
const codeEl = outerScaffold.querySelector('code');
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
|
||||
text: getTextToCopy
|
||||
|
||||
Reference in New Issue
Block a user