Built site for gh-pages
This commit is contained in:
@@ -363,7 +363,7 @@ Description
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="list">
|
||||
<tr data-index="0" data-listing-file-modified-sort="1725544715074" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
|
||||
<tr data-index="0" data-listing-file-modified-sort="1725545504021" data-listing-reading-time-sort="1" data-listing-word-count-sort="47" data-listing-title-sort="Pre-training" data-listing-filename-sort="pretraining.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/pretraining.html" class="title listing-title">Pre-training</a>
|
||||
</td>
|
||||
@@ -371,7 +371,7 @@ Description
|
||||
<span class="listing-description">Data format for a pre-training completion task.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="1" data-listing-file-modified-sort="1725544715074" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
|
||||
<tr data-index="1" data-listing-file-modified-sort="1725545504021" data-listing-reading-time-sort="2" data-listing-word-count-sort="308" data-listing-title-sort="Instruction Tuning" data-listing-filename-sort="inst_tune.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/inst_tune.html" class="title listing-title">Instruction Tuning</a>
|
||||
</td>
|
||||
@@ -379,7 +379,7 @@ Description
|
||||
<span class="listing-description">Instruction tuning formats for supervised fine-tuning.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="2" data-listing-file-modified-sort="1725544715074" data-listing-reading-time-sort="2" data-listing-word-count-sort="254" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
|
||||
<tr data-index="2" data-listing-file-modified-sort="1725545504021" data-listing-reading-time-sort="2" data-listing-word-count-sort="254" data-listing-title-sort="Conversation" data-listing-filename-sort="conversation.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/conversation.html" class="title listing-title">Conversation</a>
|
||||
</td>
|
||||
@@ -387,7 +387,7 @@ Description
|
||||
<span class="listing-description">Conversation format for supervised fine-tuning.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="3" data-listing-file-modified-sort="1725544715074" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
|
||||
<tr data-index="3" data-listing-file-modified-sort="1725545504021" data-listing-reading-time-sort="1" data-listing-word-count-sort="3" data-listing-title-sort="Template-Free" data-listing-filename-sort="template_free.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/template_free.html" class="title listing-title">Template-Free</a>
|
||||
</td>
|
||||
@@ -395,7 +395,7 @@ Description
|
||||
<span class="listing-description">Construct prompts without a template.</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr data-index="4" data-listing-file-modified-sort="1725544715074" data-listing-reading-time-sort="1" data-listing-word-count-sort="90" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
|
||||
<tr data-index="4" data-listing-file-modified-sort="1725545504021" data-listing-reading-time-sort="1" data-listing-word-count-sort="92" data-listing-title-sort="Custom Pre-Tokenized Dataset" data-listing-filename-sort="tokenized.qmd">
|
||||
<td>
|
||||
<a href="../../docs/dataset-formats/tokenized.html" class="title listing-title">Custom Pre-Tokenized Dataset</a>
|
||||
</td>
|
||||
|
||||
@@ -322,7 +322,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
|
||||
<li>Pass an empty <code>type:</code> in your axolotl config.</li>
|
||||
<li>Columns in Dataset must be exactly <code>input_ids</code>, <code>attention_mask</code>, <code>labels</code></li>
|
||||
<li>To indicate that a token should be ignored during training, set its corresponding label to <code>-100</code>.</li>
|
||||
<li>Do not add BOS/EOS. Axolotl will add them for you based on the default tokenizer for the model you’re using.</li>
|
||||
<li>You must add BOS and EOS, and make sure that you are training on EOS by not setting its label to -100.</li>
|
||||
<li>For pretraining, do not truncate/pad documents to the context window length.</li>
|
||||
<li>For instruction training, documents must be truncated/padded as desired.</li>
|
||||
</ul>
|
||||
|
||||
@@ -200,7 +200,7 @@
|
||||
"href": "docs/dataset-formats/tokenized.html",
|
||||
"title": "Custom Pre-Tokenized Dataset",
|
||||
"section": "",
|
||||
"text": "Pass an empty type: in your axolotl config.\nColumns in Dataset must be exactly input_ids, attention_mask, labels\nTo indicate that a token should be ignored during training, set its corresponding label to -100.\nDo not add BOS/EOS. Axolotl will add them for you based on the default tokenizer for the model you’re using.\nFor pretraining, do not truncate/pad documents to the context window length.\nFor instruction training, documents must be truncated/padded as desired.\n\nSample config:\n\n\nconfig.yml\n\ndatasets:\n - path: /path/to/your/file.jsonl\n ds_type: json\n type:\n\nSample jsonl:\n{\"input_ids\":[271,299,99],\"attention_mask\":[1,1,1],\"labels\":[271,-100,99]}\n{\"input_ids\":[87,227,8383,12],\"attention_mask\":[1,1,1,1],\"labels\":[87,227,8383,12]}",
|
||||
"text": "Pass an empty type: in your axolotl config.\nColumns in Dataset must be exactly input_ids, attention_mask, labels\nTo indicate that a token should be ignored during training, set its corresponding label to -100.\nYou must add BOS and EOS, and make sure that you are training on EOS by not setting its label to -100.\nFor pretraining, do not truncate/pad documents to the context window length.\nFor instruction training, documents must be truncated/padded as desired.\n\nSample config:\n\n\nconfig.yml\n\ndatasets:\n - path: /path/to/your/file.jsonl\n ds_type: json\n type:\n\nSample jsonl:\n{\"input_ids\":[271,299,99],\"attention_mask\":[1,1,1],\"labels\":[271,-100,99]}\n{\"input_ids\":[87,227,8383,12],\"attention_mask\":[1,1,1,1],\"labels\":[87,227,8383,12]}",
|
||||
"crumbs": [
|
||||
"Dataset Formats",
|
||||
"Custom Pre-Tokenized Dataset"
|
||||
|
||||
52
sitemap.xml
52
sitemap.xml
@@ -2,106 +2,106 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/examples/colab-notebooks/colab-axolotl-example.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.078Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/unsloth.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/amd_hpc.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/rlhf.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset_preprocessing.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/nccl.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/multipack.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/debugging.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/tokenized.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/template_free.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/pretraining.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/index.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.086Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.033Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/src/axolotl/integrations/LICENSE.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.090Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.037Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/FAQS.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.017Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/TODO.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/torchao.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/inst_tune.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/index.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/conversation.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/faq.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/batch_vs_grad.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/input_output.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/multi-node.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/config.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/mac.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://axolotl-ai-cloud.github.io/axolotl/docs/fsdp_qlora.html</loc>
|
||||
<lastmod>2024-09-05T13:58:35.074Z</lastmod>
|
||||
<lastmod>2024-09-05T14:11:44.021Z</lastmod>
|
||||
</url>
|
||||
</urlset>
|
||||
|
||||
Reference in New Issue
Block a user