Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2024-04-18 17:21:10 +00:00
parent 9397b5376f
commit fed83311f8
6 changed files with 83 additions and 72 deletions

View File

@@ -1 +1 @@
17b2baf7
1cb32d4f

View File

@@ -714,36 +714,37 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<span id="cb1-406"><a href="#cb1-406" aria-hidden="true" tabindex="-1"></a><span class="co"> # bos_token: "&lt;s&gt;"</span></span>
<span id="cb1-407"><a href="#cb1-407" aria-hidden="true" tabindex="-1"></a><span class="co"> # eos_token: "&lt;/s&gt;"</span></span>
<span id="cb1-408"><a href="#cb1-408" aria-hidden="true" tabindex="-1"></a><span class="co"> # unk_token: "&lt;unk&gt;"</span></span>
<span id="cb1-409"><a href="#cb1-409" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-410"><a href="#cb1-410" aria-hidden="true" tabindex="-1"></a><span class="co"># Add extra tokens.</span></span>
<span id="cb1-411"><a href="#cb1-411" aria-hidden="true" tabindex="-1"></a><span class="fu">tokens</span><span class="kw">:</span></span>
<span id="cb1-412"><a href="#cb1-412" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-413"><a href="#cb1-413" aria-hidden="true" tabindex="-1"></a><span class="co"># FSDP</span></span>
<span id="cb1-414"><a href="#cb1-414" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp</span><span class="kw">:</span></span>
<span id="cb1-415"><a href="#cb1-415" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
<span id="cb1-416"><a href="#cb1-416" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-417"><a href="#cb1-417" aria-hidden="true" tabindex="-1"></a><span class="co"># Deepspeed config path. e.g., deepspeed_configs/zero3.json</span></span>
<span id="cb1-418"><a href="#cb1-418" aria-hidden="true" tabindex="-1"></a><span class="fu">deepspeed</span><span class="kw">:</span></span>
<span id="cb1-419"><a href="#cb1-419" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-420"><a href="#cb1-420" aria-hidden="true" tabindex="-1"></a><span class="co"># Advanced DDP Arguments</span></span>
<span id="cb1-421"><a href="#cb1-421" aria-hidden="true" tabindex="-1"></a><span class="fu">ddp_timeout</span><span class="kw">:</span></span>
<span id="cb1-422"><a href="#cb1-422" aria-hidden="true" tabindex="-1"></a><span class="fu">ddp_bucket_cap_mb</span><span class="kw">:</span></span>
<span id="cb1-423"><a href="#cb1-423" aria-hidden="true" tabindex="-1"></a><span class="fu">ddp_broadcast_buffers</span><span class="kw">:</span></span>
<span id="cb1-424"><a href="#cb1-424" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-425"><a href="#cb1-425" aria-hidden="true" tabindex="-1"></a><span class="co"># Path to torch distx for optim 'adamw_anyprecision'</span></span>
<span id="cb1-426"><a href="#cb1-426" aria-hidden="true" tabindex="-1"></a><span class="fu">torchdistx_path</span><span class="kw">:</span></span>
<span id="cb1-427"><a href="#cb1-427" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-428"><a href="#cb1-428" aria-hidden="true" tabindex="-1"></a><span class="co"># Set to HF dataset for type: 'completion' for streaming instead of pre-tokenize</span></span>
<span id="cb1-429"><a href="#cb1-429" aria-hidden="true" tabindex="-1"></a><span class="fu">pretraining_dataset</span><span class="kw">:</span></span>
<span id="cb1-430"><a href="#cb1-430" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-431"><a href="#cb1-431" aria-hidden="true" tabindex="-1"></a><span class="co"># Debug mode</span></span>
<span id="cb1-432"><a href="#cb1-432" aria-hidden="true" tabindex="-1"></a><span class="fu">debug</span><span class="kw">:</span></span>
<span id="cb1-433"><a href="#cb1-433" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-434"><a href="#cb1-434" aria-hidden="true" tabindex="-1"></a><span class="co"># Seed</span></span>
<span id="cb1-435"><a href="#cb1-435" aria-hidden="true" tabindex="-1"></a><span class="fu">seed</span><span class="kw">:</span></span>
<span id="cb1-436"><a href="#cb1-436" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-437"><a href="#cb1-437" aria-hidden="true" tabindex="-1"></a><span class="co"># Allow overwrite yml config using from cli</span></span>
<span id="cb1-438"><a href="#cb1-438" aria-hidden="true" tabindex="-1"></a><span class="fu">strict</span><span class="kw">:</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<span id="cb1-409"><a href="#cb1-409" aria-hidden="true" tabindex="-1"></a><span class="co"> # pad_token: "[PAD]"</span></span>
<span id="cb1-410"><a href="#cb1-410" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-411"><a href="#cb1-411" aria-hidden="true" tabindex="-1"></a><span class="co"># Add extra tokens.</span></span>
<span id="cb1-412"><a href="#cb1-412" aria-hidden="true" tabindex="-1"></a><span class="fu">tokens</span><span class="kw">:</span></span>
<span id="cb1-413"><a href="#cb1-413" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-414"><a href="#cb1-414" aria-hidden="true" tabindex="-1"></a><span class="co"># FSDP</span></span>
<span id="cb1-415"><a href="#cb1-415" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp</span><span class="kw">:</span></span>
<span id="cb1-416"><a href="#cb1-416" aria-hidden="true" tabindex="-1"></a><span class="fu">fsdp_config</span><span class="kw">:</span></span>
<span id="cb1-417"><a href="#cb1-417" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-418"><a href="#cb1-418" aria-hidden="true" tabindex="-1"></a><span class="co"># Deepspeed config path. e.g., deepspeed_configs/zero3.json</span></span>
<span id="cb1-419"><a href="#cb1-419" aria-hidden="true" tabindex="-1"></a><span class="fu">deepspeed</span><span class="kw">:</span></span>
<span id="cb1-420"><a href="#cb1-420" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-421"><a href="#cb1-421" aria-hidden="true" tabindex="-1"></a><span class="co"># Advanced DDP Arguments</span></span>
<span id="cb1-422"><a href="#cb1-422" aria-hidden="true" tabindex="-1"></a><span class="fu">ddp_timeout</span><span class="kw">:</span></span>
<span id="cb1-423"><a href="#cb1-423" aria-hidden="true" tabindex="-1"></a><span class="fu">ddp_bucket_cap_mb</span><span class="kw">:</span></span>
<span id="cb1-424"><a href="#cb1-424" aria-hidden="true" tabindex="-1"></a><span class="fu">ddp_broadcast_buffers</span><span class="kw">:</span></span>
<span id="cb1-425"><a href="#cb1-425" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-426"><a href="#cb1-426" aria-hidden="true" tabindex="-1"></a><span class="co"># Path to torch distx for optim 'adamw_anyprecision'</span></span>
<span id="cb1-427"><a href="#cb1-427" aria-hidden="true" tabindex="-1"></a><span class="fu">torchdistx_path</span><span class="kw">:</span></span>
<span id="cb1-428"><a href="#cb1-428" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-429"><a href="#cb1-429" aria-hidden="true" tabindex="-1"></a><span class="co"># Set to HF dataset for type: 'completion' for streaming instead of pre-tokenize</span></span>
<span id="cb1-430"><a href="#cb1-430" aria-hidden="true" tabindex="-1"></a><span class="fu">pretraining_dataset</span><span class="kw">:</span></span>
<span id="cb1-431"><a href="#cb1-431" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-432"><a href="#cb1-432" aria-hidden="true" tabindex="-1"></a><span class="co"># Debug mode</span></span>
<span id="cb1-433"><a href="#cb1-433" aria-hidden="true" tabindex="-1"></a><span class="fu">debug</span><span class="kw">:</span></span>
<span id="cb1-434"><a href="#cb1-434" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-435"><a href="#cb1-435" aria-hidden="true" tabindex="-1"></a><span class="co"># Seed</span></span>
<span id="cb1-436"><a href="#cb1-436" aria-hidden="true" tabindex="-1"></a><span class="fu">seed</span><span class="kw">:</span></span>
<span id="cb1-437"><a href="#cb1-437" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-438"><a href="#cb1-438" aria-hidden="true" tabindex="-1"></a><span class="co"># Allow overwrite yml config using from cli</span></span>
<span id="cb1-439"><a href="#cb1-439" aria-hidden="true" tabindex="-1"></a><span class="fu">strict</span><span class="kw">:</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>

View File

@@ -351,7 +351,7 @@ Description
</tr>
</thead>
<tbody class="list">
<tr data-index="0" data-listing-file-modified-sort="1713311159149" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
<tr data-index="0" data-listing-file-modified-sort="1713460832370" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
<td>
<a href="../../docs/dataset-formats/pretraining.html" class="title listing-title">Pre-training</a>
</td>
@@ -359,7 +359,7 @@ Description
<span class="listing-description">Data format for a pre-training completion task.</span>
</td>
</tr>
<tr data-index="1" data-listing-file-modified-sort="1713311159149" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
<tr data-index="1" data-listing-file-modified-sort="1713460832370" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
<td>
<a href="../../docs/dataset-formats/inst_tune.html" class="title listing-title">Instruction Tuning</a>
</td>
@@ -367,7 +367,7 @@ Description
<span class="listing-description">Instruction tuning formats for supervised fine-tuning.</span>
</td>
</tr>
<tr data-index="2" data-listing-file-modified-sort="1713311159149" data-listing-reading-time-sort="2" data-listing-word-count-sort="235" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
<tr data-index="2" data-listing-file-modified-sort="1713460832370" data-listing-reading-time-sort="2" data-listing-word-count-sort="235" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
<td>
<a href="../../docs/dataset-formats/conversation.html" class="title listing-title">Conversation</a>
</td>
@@ -375,7 +375,7 @@ Description
<span class="listing-description">Conversation format for supervised fine-tuning.</span>
</td>
</tr>
<tr data-index="3" data-listing-file-modified-sort="1713311159149" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
<tr data-index="3" data-listing-file-modified-sort="1713460832370" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
<td>
<a href="../../docs/dataset-formats/template_free.html" class="title listing-title">Template-Free</a>
</td>
@@ -383,7 +383,7 @@ Description
<span class="listing-description">Construct prompts without a template.</span>
</td>
</tr>
<tr data-index="4" data-listing-file-modified-sort="1713311159149" data-listing-reading-time-sort="1" data-listing-word-count-sort="22" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
<tr data-index="4" data-listing-file-modified-sort="1713460832370" data-listing-reading-time-sort="1" data-listing-word-count-sort="22" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
<td>
<a href="../../docs/dataset-formats/tokenized.html" class="title listing-title">Custom Pre-Tokenized Dataset</a>
</td>

View File

@@ -410,6 +410,16 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
</tr>
<tr class="even">
<td>Mixtral8X22</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
<td></td>
<td></td>
<td></td>
<td></td>
<td></td>
</tr>
<tr class="odd">
<td>Pythia</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -419,7 +429,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="odd">
<tr class="even">
<td>cerebras</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -429,7 +439,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="even">
<tr class="odd">
<td>btlm</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -439,7 +449,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="odd">
<tr class="even">
<td>mpt</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -449,7 +459,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="even">
<tr class="odd">
<td>falcon</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -459,7 +469,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="odd">
<tr class="even">
<td>gpt-j</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -469,7 +479,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="even">
<tr class="odd">
<td>XGen</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -479,7 +489,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="odd">
<tr class="even">
<td>phi</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -489,7 +499,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="even">
<tr class="odd">
<td>RWKV</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -499,7 +509,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="odd">
<tr class="even">
<td>Qwen</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -509,7 +519,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
<td></td>
<td></td>
</tr>
<tr class="even">
<tr class="odd">
<td>Gemma</td>
<td style="text-align: left;"></td>
<td style="text-align: left;"></td>
@@ -777,7 +787,7 @@ cd skypilot/llm/axolotl</code></pre>
<p>Deepspeed is an optimization suite for multi-gpu systems allowing you to train much larger models than you might typically be able to fit into your GPUs VRAM. More information about the various optimization types for deepspeed is available at https://huggingface.co/docs/accelerate/main/en/usage_guides/deepspeed#what-is-integrated</p>
<p>We provide several default deepspeed JSON configurations for ZeRO stage 1, 2, and 3.</p>
<div class="sourceCode" id="cb21"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">deepspeed</span><span class="kw">:</span><span class="at"> deepspeed_configs/zero1.json</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<pre class="shell"><code>accelerate launch -m axolotl.cli.train examples/llama-2/config.py --deepspeed deepspeed_configs/zero1.json</code></pre>
<pre class="shell"><code>accelerate launch -m axolotl.cli.train examples/llama-2/config.yml --deepspeed deepspeed_configs/zero1.json</code></pre>
</section>
<section id="fsdp" class="level5">
<h5 class="anchored" data-anchor-id="fsdp">FSDP</h5>

File diff suppressed because one or more lines are too long

View File

@@ -2,90 +2,90 @@
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/index.html</loc>
<lastmod>2024-04-16T23:45:59.161Z</lastmod>
<lastmod>2024-04-18T17:20:32.382Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/TODO.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.366Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/multi-node.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/rlhf.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/nccl.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/multipack.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/tokenized.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/inst_tune.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/conversation.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/batch_vs_grad.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/input_output.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/faq.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset_preprocessing.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/template_free.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/pretraining.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/dataset-formats/index.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/mac.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/config.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/debugging.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/docs/fsdp_qlora.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/examples/colab-notebooks/colab-axolotl-example.html</loc>
<lastmod>2024-04-16T23:45:59.153Z</lastmod>
<lastmod>2024-04-18T17:20:32.370Z</lastmod>
</url>
<url>
<loc>https://OpenAccess-AI-Collective.github.io/axolotl/FAQS.html</loc>
<lastmod>2024-04-16T23:45:59.149Z</lastmod>
<lastmod>2024-04-18T17:20:32.366Z</lastmod>
</url>
</urlset>