Built site for gh-pages
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="quarto-1.7.34">
|
||||
<meta name="generator" content="quarto-1.8.24">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
|
||||
@@ -67,14 +67,15 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<link href="../favicon.jpg" rel="icon" type="image/jpeg">
|
||||
<script src="../site_libs/quarto-html/quarto.js" type="module"></script>
|
||||
<script src="../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
|
||||
<script src="../site_libs/quarto-html/axe/axe-check.js" type="module"></script>
|
||||
<script src="../site_libs/quarto-html/popper.min.js"></script>
|
||||
<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-befe23ebd2f54d8af2c8a89d1a1611f1.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-b651517ce65839d647a86e2780455cfb.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../site_libs/bootstrap/bootstrap-e9895ec3143e9833a687747e8d39d226.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
|
||||
<link href="../site_libs/bootstrap/bootstrap-f9d679a32da2b248d4ca48a0e58e089e.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
@@ -124,7 +125,8 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a href="../index.html" class="navbar-brand navbar-brand-logo">
|
||||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo">
|
||||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo light-content">
|
||||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo dark-content">
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
@@ -150,6 +152,10 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
|
||||
<!-- sidebar -->
|
||||
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation docked overflow-auto">
|
||||
<div class="pt-lg-2 mt-2 text-left sidebar-header">
|
||||
<a href="../index.html" class="sidebar-logo-link">
|
||||
</a>
|
||||
</div>
|
||||
<div class="sidebar-menu-container">
|
||||
<ul class="list-unstyled mt-1">
|
||||
<li class="sidebar-item">
|
||||
@@ -563,18 +569,18 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
|
||||
<section id="sec-deepspeed-config" class="level3" data-number="2.1">
|
||||
<h3 data-number="2.1" class="anchored" data-anchor-id="sec-deepspeed-config"><span class="header-section-number">2.1</span> Configuration</h3>
|
||||
<p>Add to your YAML config:</p>
|
||||
<div class="sourceCode" id="cb1"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">deepspeed</span><span class="kw">:</span><span class="at"> deepspeed_configs/zero1.json</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb1"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">deepspeed</span><span class="kw">:</span><span class="at"> deepspeed_configs/zero1.json</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="sec-deepspeed-usage" class="level3" data-number="2.2">
|
||||
<h3 data-number="2.2" class="anchored" data-anchor-id="sec-deepspeed-usage"><span class="header-section-number">2.2</span> Usage</h3>
|
||||
<div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Fetch deepspeed configs (if not already present)</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Fetch deepspeed configs (if not already present)</span></span>
|
||||
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> fetch deepspeed_configs</span>
|
||||
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Passing arg via config</span></span>
|
||||
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> train config.yml</span>
|
||||
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Passing arg via cli</span></span>
|
||||
<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> train config.yml <span class="at">--deepspeed</span> deepspeed_configs/zero1.json</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a><span class="ex">axolotl</span> train config.yml <span class="at">--deepspeed</span> deepspeed_configs/zero1.json</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
<section id="sec-zero-stages" class="level3" data-number="2.3">
|
||||
<h3 data-number="2.3" class="anchored" data-anchor-id="sec-zero-stages"><span class="header-section-number">2.3</span> ZeRO Stages</h3>
|
||||
@@ -669,23 +675,23 @@ also follow the config field mapping below to update field names.</p>
|
||||
</table>
|
||||
<p>For more details, please see the migration guide in the <a href="https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md">torchtitan repo</a>. In Axolotl,
|
||||
if you were using the following FSDP1 config:</p>
|
||||
<div class="sourceCode" id="cb3"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_version</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb3"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_version</span><span class="kw">:</span><span class="at"> </span><span class="dv">1</span></span>
|
||||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
|
||||
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_cpu_ram_efficient_loading</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_auto_wrap_policy</span><span class="kw">:</span><span class="at"> TRANSFORMER_BASED_WRAP</span></span>
|
||||
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> Qwen3DecoderLayer</span></span>
|
||||
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_state_dict_type</span><span class="kw">:</span><span class="at"> FULL_STATE_DICT</span></span>
|
||||
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_sharding_strategy</span><span class="kw">:</span><span class="at"> FULL_SHARD</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_sharding_strategy</span><span class="kw">:</span><span class="at"> FULL_SHARD</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
<p>You can migrate to the following FSDP2 config:</p>
|
||||
<div class="sourceCode" id="cb4"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_version</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb4"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_version</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span>
|
||||
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
|
||||
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">false</span></span>
|
||||
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">cpu_ram_efficient_loading</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">auto_wrap_policy</span><span class="kw">:</span><span class="at"> TRANSFORMER_BASED_WRAP</span></span>
|
||||
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> Qwen3DecoderLayer</span></span>
|
||||
<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">state_dict_type</span><span class="kw">:</span><span class="at"> FULL_STATE_DICT</span></span>
|
||||
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reshard_after_forward</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">reshard_after_forward</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="sec-fsdp-config" class="level3" data-number="3.2">
|
||||
@@ -703,13 +709,13 @@ Note
|
||||
<p>Using <code>fsdp</code> to configure FSDP is deprecated and will be removed in an upcoming release of Axolotl. Please use <code>fsdp_config</code> as above instead.</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sourceCode" id="cb5"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp</span><span class="kw">:</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb5"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp</span><span class="kw">:</span></span>
|
||||
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> full_shard</span></span>
|
||||
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> auto_wrap</span></span>
|
||||
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
|
||||
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_offload_params</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
|
||||
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_state_dict_type</span><span class="kw">:</span><span class="at"> FULL_STATE_DICT</span></span>
|
||||
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> LlamaDecoderLayer</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> LlamaDecoderLayer</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="sec-sequence-parallelism" class="level2" data-number="4">
|
||||
@@ -740,7 +746,7 @@ single sequence causes OOM errors during model training.</p>
|
||||
<section id="sec-common-problems" class="level3" data-number="6.2">
|
||||
<h3 data-number="6.2" class="anchored" data-anchor-id="sec-common-problems"><span class="header-section-number">6.2</span> Common Problems</h3>
|
||||
<div class="tabset-margin-container"></div><div class="panel-tabset">
|
||||
<ul class="nav nav-tabs" role="tablist"><li class="nav-item" role="presentation"><a class="nav-link active" id="tabset-1-1-tab" data-bs-toggle="tab" data-bs-target="#tabset-1-1" role="tab" aria-controls="tabset-1-1" aria-selected="true" aria-current="page">Memory Issues</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-1-2-tab" data-bs-toggle="tab" data-bs-target="#tabset-1-2" role="tab" aria-controls="tabset-1-2" aria-selected="false">Training Instability</a></li></ul>
|
||||
<ul class="nav nav-tabs" role="tablist"><li class="nav-item" role="presentation"><a class="nav-link active" id="tabset-1-1-tab" data-bs-toggle="tab" data-bs-target="#tabset-1-1" role="tab" aria-controls="tabset-1-1" aria-selected="true" href="">Memory Issues</a></li><li class="nav-item" role="presentation"><a class="nav-link" id="tabset-1-2-tab" data-bs-toggle="tab" data-bs-target="#tabset-1-2" role="tab" aria-controls="tabset-1-2" aria-selected="false" href="">Training Instability</a></li></ul>
|
||||
<div class="tab-content">
|
||||
<div id="tabset-1-1" class="tab-pane active" role="tabpanel" aria-labelledby="tabset-1-1-tab">
|
||||
<ul>
|
||||
@@ -819,13 +825,14 @@ single sequence causes OOM errors during model training.</p>
|
||||
e.clearSelection();
|
||||
}
|
||||
const getTextToCopy = function(trigger) {
|
||||
const codeEl = trigger.previousElementSibling.cloneNode(true);
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
const outerScaffold = trigger.parentElement.cloneNode(true);
|
||||
const codeEl = outerScaffold.querySelector('code');
|
||||
for (const childEl of codeEl.children) {
|
||||
if (isCodeAnnotation(childEl)) {
|
||||
childEl.remove();
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
return codeEl.innerText;
|
||||
}
|
||||
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
|
||||
text: getTextToCopy
|
||||
@@ -1218,7 +1225,7 @@ single sequence causes OOM errors during model training.</p>
|
||||
}
|
||||
});
|
||||
</script><div class="modal fade" id="quarto-embedded-source-code-modal" tabindex="-1" aria-labelledby="quarto-embedded-source-code-modal-label" aria-hidden="true"><div class="modal-dialog modal-dialog-scrollable"><div class="modal-content"><div class="modal-header"><h5 class="modal-title" id="quarto-embedded-source-code-modal-label">Source Code</h5><button class="btn-close" data-bs-dismiss="modal"></button></div><div class="modal-body"><div class="">
|
||||
<div class="sourceCode" id="cb6" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
|
||||
<div class="code-copy-outer-scaffold"><div class="sourceCode" id="cb6" data-shortcodes="false"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
|
||||
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="an">title:</span><span class="co"> "Multi-GPU"</span></span>
|
||||
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="an">format:</span></span>
|
||||
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a><span class="co"> html:</span></span>
|
||||
@@ -1399,7 +1406,7 @@ single sequence causes OOM errors during model training.</p>
|
||||
<span id="cb6-179"><a href="#cb6-179" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb6-180"><a href="#cb6-180" aria-hidden="true" tabindex="-1"></a>:::</span>
|
||||
<span id="cb6-181"><a href="#cb6-181" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb6-182"><a href="#cb6-182" aria-hidden="true" tabindex="-1"></a>For more detailed troubleshooting, see our <span class="co">[</span><span class="ot">debugging guide</span><span class="co">](debugging.qmd)</span>.</span></code><button title="Copy to Clipboard" class="code-copy-button" data-in-quarto-modal=""><i class="bi"></i></button></pre></div>
|
||||
<span id="cb6-182"><a href="#cb6-182" aria-hidden="true" tabindex="-1"></a>For more detailed troubleshooting, see our <span class="co">[</span><span class="ot">debugging guide</span><span class="co">](debugging.qmd)</span>.</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button" data-in-quarto-modal=""><i class="bi"></i></button></div>
|
||||
</div></div></div></div></div>
|
||||
</div> <!-- /content -->
|
||||
|
||||
|
||||
Reference in New Issue
Block a user