Built site for gh-pages
This commit is contained in:
@@ -70,10 +70,10 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="../site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-2f5df379a58b258e96c21c0638c20c03.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<link href="../site_libs/quarto-html/quarto-syntax-highlighting-dark-b53751a350365c71b6c909e95f209ed1.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="../site_libs/bootstrap/bootstrap-141b2cdb37a94fcfd6825c1581ff795f.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
|
||||
<link href="../site_libs/bootstrap/bootstrap-0cda210ced8960466d2ee7bf22d15016.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="dark">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "navbar",
|
||||
"copy-button": false,
|
||||
@@ -113,8 +113,8 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<nav class="navbar navbar-expand " data-bs-theme="dark">
|
||||
<div class="navbar-container container-fluid">
|
||||
<div class="navbar-brand-container mx-auto">
|
||||
<a class="navbar-brand" href="../index.html">
|
||||
<span class="navbar-title">Axolotl</span>
|
||||
<a href="../index.html" class="navbar-brand navbar-brand-logo">
|
||||
<img src="../image/axolotl_logo_digital_white.svg" alt="" class="navbar-logo">
|
||||
</a>
|
||||
</div>
|
||||
<div class="quarto-navbar-tools tools-wide tools-end">
|
||||
@@ -130,7 +130,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<i class="bi bi-layout-text-sidebar-reverse"></i>
|
||||
</button>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/multi-node.html">Multi Node</a></li></ol></nav>
|
||||
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/multi-gpu.html">Deployments</a></li><li class="breadcrumb-item"><a href="../docs/multi-node.html">Multi Node</a></li></ol></nav>
|
||||
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
</a>
|
||||
</div>
|
||||
@@ -151,7 +151,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">How-To Guides</span></a>
|
||||
<span class="menu-text">Getting Started</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-1" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -160,91 +160,25 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/getting-started.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Getting Started with Axolotl</span></a>
|
||||
<span class="menu-text">Quickstart</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/installation.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Installation Guide</span></a>
|
||||
<span class="menu-text">Installation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
<a href="../docs/cli.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">CLI Reference</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/inference.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Inference Guide</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/input_output.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Template-free prompt construction</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi-GPU Training Guide</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multi-node.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/unsloth.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Unsloth</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Training with AMD GPUs on HPC Systems</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Ray Train integration</span></a>
|
||||
<span class="menu-text">Inference</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
@@ -299,7 +233,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<span class="menu-text">Deployments</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
@@ -307,18 +241,187 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multi-gpu.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multi-GPU</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multi-node.html" class="sidebar-item-text sidebar-link active">
|
||||
<span class="menu-text">Multi Node</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/ray-integration.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Ray Train</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/amd_hpc.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">AMD GPUs on HPC Systems</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/mac.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Mac M-series</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">How To Guides</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-4" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-4" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multimodal.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">MultiModal / Vision Language Models (BETA)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/rlhf.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">RLHF (Beta)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/reward_modelling.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Reward Modelling</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/lr_groups.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Learning Rate Groups</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/lora_optims.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">LoRA Optimizations</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Core Concepts</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-5" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-5" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/batch_vs_grad.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Batch size vs Gradient accumulation</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/dataset_preprocessing.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Dataset Preprocessing</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/multipack.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Multipack (Sample Packing)</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Advanced Features</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-6" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-6" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/fsdp_qlora.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FDSP + QLoRA</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/unsloth.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Unsloth</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/torchao.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">PyTorch ao</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/custom_integrations.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Custom Integrations</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Troubleshooting</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-7" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-7" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/debugging.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Debugging</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/nccl.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">NCCL</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item sidebar-item-section">
|
||||
<div class="sidebar-item-container">
|
||||
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true">
|
||||
<span class="menu-text">Reference</span></a>
|
||||
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-8" role="navigation" aria-expanded="true" aria-label="Toggle section">
|
||||
<i class="bi bi-chevron-right ms-2"></i>
|
||||
</a>
|
||||
</div>
|
||||
<ul id="quarto-sidebar-section-8" class="collapse list-unstyled sidebar-section depth1 show">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/config.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">Config options</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="../docs/faq.html" class="sidebar-item-text sidebar-link">
|
||||
<span class="menu-text">FAQ</span></a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
@@ -338,7 +441,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/getting-started.html">How-To Guides</a></li><li class="breadcrumb-item"><a href="../docs/multi-node.html">Multi Node</a></li></ol></nav>
|
||||
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../docs/multi-gpu.html">Deployments</a></li><li class="breadcrumb-item"><a href="../docs/multi-node.html">Multi Node</a></li></ol></nav>
|
||||
<div class="quarto-title">
|
||||
<h1 class="title">Multi Node</h1>
|
||||
</div>
|
||||
@@ -378,8 +481,8 @@ Important
|
||||
<p>Make sure the main machine is reachable by other machines.</p>
|
||||
</div>
|
||||
</div>
|
||||
<section id="accelerate" class="level1">
|
||||
<h1>Accelerate</h1>
|
||||
<section id="accelerate" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="accelerate">Accelerate</h2>
|
||||
<p>You will need to create a configuration for accelerate, either by using <code>accelerate config</code> and follow the instructions or you can use one of the preset below:</p>
|
||||
<p>~/.cache/huggingface/accelerate/default_config.yaml</p>
|
||||
<div class="sourceCode" id="cb1"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">compute_environment</span><span class="kw">:</span><span class="at"> LOCAL_MACHINE</span></span>
|
||||
@@ -409,17 +512,17 @@ Important
|
||||
<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">fsdp_transformer_layer_cls_to_wrap</span><span class="kw">:</span><span class="at"> LlamaDecoderLayer</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>All you have to do now is launch using accelerate as you would usually do on each machine and voila, the processes will start once you have launched accelerate on every machine.</p>
|
||||
</section>
|
||||
<section id="raytrain" class="level1">
|
||||
<h1>Raytrain</h1>
|
||||
<section id="raytrain" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="raytrain">Raytrain</h2>
|
||||
<p>Please see ray train doc <a href="../docs/ray-integration.html">here</a>.</p>
|
||||
</section>
|
||||
<section id="torchrun" class="level1">
|
||||
<h1>Torchrun</h1>
|
||||
<section id="torchrun" class="level2">
|
||||
<h2 class="anchored" data-anchor-id="torchrun">Torchrun</h2>
|
||||
<p>If you are using Infiniband, we recommend torchrun to utilize the full bandwidth.</p>
|
||||
<p>Set the following env (change buffersize/socketname depending on your system):</p>
|
||||
<div class="sourceCode" id="cb3"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="at">export NCCL_IB_DISABLE=0</span></span>
|
||||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="at">export NCCL_SOCKET_IFNAME="eth0,en,eth,em,bond"</span></span>
|
||||
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="at">export NCCL_BUFFSIZE=2097152</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="bu">export</span> <span class="va">NCCL_IB_DISABLE</span><span class="op">=</span>0</span>
|
||||
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="bu">export</span> <span class="va">NCCL_SOCKET_IFNAME</span><span class="op">=</span><span class="st">"eth0,en,eth,em,bond"</span></span>
|
||||
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="bu">export</span> <span class="va">NCCL_BUFFSIZE</span><span class="op">=</span>2097152</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>Run the following on each node:</p>
|
||||
<div class="sourceCode" id="cb4"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="ex">torchrun</span> <span class="at">--nnodes</span> <span class="va">$num_nodes</span> <span class="at">--nproc_per_node</span> <span class="va">$gpu_per_node</span> <span class="at">--rdzv_id</span> <span class="va">$rdzv_id</span> <span class="at">--rdzv_backend</span> c10d <span class="at">--rdzv_endpoint</span> <span class="st">"</span><span class="va">$head_node_ip</span><span class="st">:</span><span class="va">$head_node_port</span><span class="st">"</span> <span class="at">-m</span> axolotl.cli.train config.yaml</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>Please make sure to substitute the placeholder variables.</p>
|
||||
|
||||
Reference in New Issue
Block a user