Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2025-01-13 15:46:05 +00:00
parent 34697fc683
commit 1b26b6a2f3
5 changed files with 44 additions and 37 deletions

View File

@@ -1 +1 @@
aa0c628c
5173f1f8

View File

@@ -363,7 +363,7 @@ Description
</tr>
</thead>
<tbody class="list">
<tr data-index="0" data-listing-file-modified-sort="1736783064835" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
<tr data-index="0" data-listing-file-modified-sort="1736783098940" data-listing-reading-time-sort="1" data-listing-word-count-sort="92" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
<td>
<a href="../../docs/dataset-formats/pretraining.html" class="title listing-title">Pre-training</a>
</td>
@@ -371,7 +371,7 @@ Description
<span class="listing-description">Data format for a pre-training completion task.</span>
</td>
</tr>
<tr data-index="1" data-listing-file-modified-sort="1736783064835" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
<tr data-index="1" data-listing-file-modified-sort="1736783098940" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
<td>
<a href="../../docs/dataset-formats/inst_tune.html" class="title listing-title">Instruction Tuning</a>
</td>
@@ -379,7 +379,7 @@ Description
<span class="listing-description">Instruction tuning formats for supervised fine-tuning.</span>
</td>
</tr>
<tr data-index="2" data-listing-file-modified-sort="1736783064835" data-listing-reading-time-sort="4" data-listing-word-count-sort="625" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
<tr data-index="2" data-listing-file-modified-sort="1736783098940" data-listing-reading-time-sort="4" data-listing-word-count-sort="625" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
<td>
<a href="../../docs/dataset-formats/conversation.html" class="title listing-title">Conversation</a>
</td>
@@ -387,7 +387,7 @@ Description
<span class="listing-description">Conversation format for supervised fine-tuning.</span>
</td>
</tr>
<tr data-index="3" data-listing-file-modified-sort="1736783064835" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
<tr data-index="3" data-listing-file-modified-sort="1736783098940" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
<td>
<a href="../../docs/dataset-formats/template_free.html" class="title listing-title">Template-Free</a>
</td>
@@ -395,7 +395,7 @@ Description
<span class="listing-description">Construct prompts without a template.</span>
</td>
</tr>
<tr data-index="4" data-listing-file-modified-sort="1736783064835" data-listing-reading-time-sort="1" data-listing-word-count-sort="92" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
<tr data-index="4" data-listing-file-modified-sort="1736783098940" data-listing-reading-time-sort="1" data-listing-word-count-sort="92" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
<td>
<a href="../../docs/dataset-formats/tokenized.html" class="title listing-title">Custom Pre-Tokenized Dataset</a>
</td>

View File

@@ -342,8 +342,15 @@ Streaming is recommended for large datasets
<div class="code-with-filename-file">
<pre><strong>config.yaml</strong></pre>
</div>
<div class="sourceCode" id="cb2" data-filename="config.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">pretraining_dataset</span><span class="kw">:</span><span class="co"> # hf path only</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="co">...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="sourceCode" id="cb2" data-filename="config.yaml"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">pretraining_dataset</span><span class="kw">:</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="kw">-</span><span class="at"> </span><span class="fu">name</span><span class="kw">:</span></span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">path</span><span class="kw">:</span></span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">split</span><span class="kw">:</span></span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">text_column</span><span class="kw">:</span><span class="co"> # column in dataset with the data, usually `text`</span></span>
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">type</span><span class="kw">:</span><span class="at"> pretrain</span></span>
<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">trust_remote_code</span><span class="kw">:</span></span>
<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">skip</span><span class="kw">:</span><span class="co"> # number of rows of data to skip over from the beginning</span></span>
<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a><span class="co">...</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</div>
</div>

View File

@@ -629,7 +629,7 @@
"href": "docs/dataset-formats/pretraining.html",
"title": "Pre-training",
"section": "",
"text": "For pretraining, there is no prompt template or roles. The only required field is text:\n\n\ndata.jsonl\n\n{\"text\": \"first row\"}\n{\"text\": \"second row\"}\n...\n\n\n\n\n\n\n\nStreaming is recommended for large datasets\n\n\n\nAxolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming:\n\n\nconfig.yaml\n\npretraining_dataset: # hf path only\n...",
"text": "For pretraining, there is no prompt template or roles. The only required field is text:\n\n\ndata.jsonl\n\n{\"text\": \"first row\"}\n{\"text\": \"second row\"}\n...\n\n\n\n\n\n\n\nStreaming is recommended for large datasets\n\n\n\nAxolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming:\n\n\nconfig.yaml\n\npretraining_dataset:\n - name:\n path:\n split:\n text_column: # column in dataset with the data, usually `text`\n type: pretrain\n trust_remote_code:\n skip: # number of rows of data to skip over from the beginning\n...",
"crumbs": [
"Dataset Formats",
"Pre-training"

View File

@@ -2,114 +2,114 @@
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/index.html</loc>
<lastmod>2025-01-13T15:44:24.847Z</lastmod>
<lastmod>2025-01-13T15:44:58.956Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/src/axolotl/integrations/LICENSE.html</loc>
<lastmod>2025-01-13T15:44:24.851Z</lastmod>
<lastmod>2025-01-13T15:44:58.960Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/nccl.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/input_output.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset_preprocessing.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/torchao.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/rlhf.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/config.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/template_free.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/conversation.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/tokenized.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/mac.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/multi-node.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/FAQS.html</loc>
<lastmod>2025-01-13T15:44:24.831Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/TODO.html</loc>
<lastmod>2025-01-13T15:44:24.831Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/faq.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/debugging.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/inst_tune.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/pretraining.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/index.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/unsloth.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/multimodal.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/batch_vs_grad.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/fsdp_qlora.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/multipack.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/amd_hpc.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.940Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/examples/colab-notebooks/colab-axolotl-example.html</loc>
<lastmod>2025-01-13T15:44:24.835Z</lastmod>
<lastmod>2025-01-13T15:44:58.944Z</lastmod>
</url>
<url>
<loc>https://axolotl-ai-cloud.github.io/axolotl/src/axolotl/integrations/cut_cross_entropy/ACKNOWLEDGEMENTS.html</loc>
<lastmod>2025-01-13T15:44:24.851Z</lastmod>
<lastmod>2025-01-13T15:44:58.960Z</lastmod>
</url>
</urlset>