diff --git a/.nojekyll b/.nojekyll
index bfad79f7f..60d0fb28a 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-b9c1f079
\ No newline at end of file
+e3ca216d
\ No newline at end of file
diff --git a/docs/dataset-formats/index.html b/docs/dataset-formats/index.html
index 1799d16d4..99e558b86 100644
--- a/docs/dataset-formats/index.html
+++ b/docs/dataset-formats/index.html
@@ -363,7 +363,7 @@ Description
-
+
|
Pre-training
|
@@ -371,7 +371,7 @@ Description
Data format for a pre-training completion task.
-
+
|
Instruction Tuning
|
@@ -379,7 +379,7 @@ Description
Instruction tuning formats for supervised fine-tuning.
-
+
|
Conversation
|
@@ -387,7 +387,7 @@ Description
Conversation format for supervised fine-tuning.
-
+
|
Template-Free
|
@@ -395,7 +395,7 @@ Description
Construct prompts without a template.
-
+
|
Custom Pre-Tokenized Dataset
|
diff --git a/docs/dataset-formats/tokenized.html b/docs/dataset-formats/tokenized.html
index 1b524eba4..51a971767 100644
--- a/docs/dataset-formats/tokenized.html
+++ b/docs/dataset-formats/tokenized.html
@@ -322,7 +322,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
Pass an empty type: in your axolotl config.
Columns in Dataset must be exactly input_ids, attention_mask, labels
To indicate that a token should be ignored during training, set its corresponding label to -100.
-Do not add BOS/EOS. Axolotl will add them for you based on the default tokenizer for the model you’re using.
+You must add BOS and EOS, and make sure that you are training on EOS by not setting its label to -100.
For pretraining, do not truncate/pad documents to the context window length.
For instruction training, documents must be truncated/padded as desired.
diff --git a/search.json b/search.json
index d8fb315ba..85b5177b6 100644
--- a/search.json
+++ b/search.json
@@ -200,7 +200,7 @@
"href": "docs/dataset-formats/tokenized.html",
"title": "Custom Pre-Tokenized Dataset",
"section": "",
- "text": "Pass an empty type: in your axolotl config.\nColumns in Dataset must be exactly input_ids, attention_mask, labels\nTo indicate that a token should be ignored during training, set its corresponding label to -100.\nDo not add BOS/EOS. Axolotl will add them for you based on the default tokenizer for the model you’re using.\nFor pretraining, do not truncate/pad documents to the context window length.\nFor instruction training, documents must be truncated/padded as desired.\n\nSample config:\n\n\nconfig.yml\n\ndatasets:\n - path: /path/to/your/file.jsonl\n ds_type: json\n type:\n\nSample jsonl:\n{\"input_ids\":[271,299,99],\"attention_mask\":[1,1,1],\"labels\":[271,-100,99]}\n{\"input_ids\":[87,227,8383,12],\"attention_mask\":[1,1,1,1],\"labels\":[87,227,8383,12]}",
+ "text": "Pass an empty type: in your axolotl config.\nColumns in Dataset must be exactly input_ids, attention_mask, labels\nTo indicate that a token should be ignored during training, set its corresponding label to -100.\nYou must add BOS and EOS, and make sure that you are training on EOS by not setting its label to -100.\nFor pretraining, do not truncate/pad documents to the context window length.\nFor instruction training, documents must be truncated/padded as desired.\n\nSample config:\n\n\nconfig.yml\n\ndatasets:\n - path: /path/to/your/file.jsonl\n ds_type: json\n type:\n\nSample jsonl:\n{\"input_ids\":[271,299,99],\"attention_mask\":[1,1,1],\"labels\":[271,-100,99]}\n{\"input_ids\":[87,227,8383,12],\"attention_mask\":[1,1,1,1],\"labels\":[87,227,8383,12]}",
"crumbs": [
"Dataset Formats",
"Custom Pre-Tokenized Dataset"
diff --git a/sitemap.xml b/sitemap.xml
index dce3bc74d..14b1baea0 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,106 +2,106 @@
https://axolotl-ai-cloud.github.io/axolotl/examples/colab-notebooks/colab-axolotl-example.html
- 2024-09-05T13:58:35.078Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/unsloth.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/amd_hpc.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/rlhf.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/dataset_preprocessing.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/nccl.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/multipack.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/debugging.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/tokenized.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/template_free.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/pretraining.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/index.html
- 2024-09-05T13:58:35.086Z
+ 2024-09-05T14:11:44.033Z
https://axolotl-ai-cloud.github.io/axolotl/src/axolotl/integrations/LICENSE.html
- 2024-09-05T13:58:35.090Z
+ 2024-09-05T14:11:44.037Z
https://axolotl-ai-cloud.github.io/axolotl/FAQS.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.017Z
https://axolotl-ai-cloud.github.io/axolotl/TODO.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/torchao.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/inst_tune.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/index.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/conversation.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/faq.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/batch_vs_grad.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/input_output.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/multi-node.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/config.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/mac.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z
https://axolotl-ai-cloud.github.io/axolotl/docs/fsdp_qlora.html
- 2024-09-05T13:58:35.074Z
+ 2024-09-05T14:11:44.021Z