From 097ec6570f77bd1bc5a89f8c281acf811c1891ac Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Thu, 5 Sep 2024 14:12:25 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- docs/dataset-formats/index.html | 10 +++--- docs/dataset-formats/tokenized.html | 2 +- search.json | 2 +- sitemap.xml | 52 ++++++++++++++--------------- 5 files changed, 34 insertions(+), 34 deletions(-) diff --git a/.nojekyll b/.nojekyll index bfad79f7f..60d0fb28a 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -b9c1f079 \ No newline at end of file +e3ca216d \ No newline at end of file diff --git a/docs/dataset-formats/index.html b/docs/dataset-formats/index.html index 1799d16d4..99e558b86 100644 --- a/docs/dataset-formats/index.html +++ b/docs/dataset-formats/index.html @@ -363,7 +363,7 @@ Description - + Pre-training @@ -371,7 +371,7 @@ Description Data format for a pre-training completion task. - + Instruction Tuning @@ -379,7 +379,7 @@ Description Instruction tuning formats for supervised fine-tuning. - + Conversation @@ -387,7 +387,7 @@ Description Conversation format for supervised fine-tuning. - + Template-Free @@ -395,7 +395,7 @@ Description Construct prompts without a template. - + Custom Pre-Tokenized Dataset diff --git a/docs/dataset-formats/tokenized.html b/docs/dataset-formats/tokenized.html index 1b524eba4..51a971767 100644 --- a/docs/dataset-formats/tokenized.html +++ b/docs/dataset-formats/tokenized.html @@ -322,7 +322,7 @@ pre > code.sourceCode > span > a:first-child::before { text-decoration: underlin
  • Pass an empty type: in your axolotl config.
  • Columns in Dataset must be exactly input_ids, attention_mask, labels
  • To indicate that a token should be ignored during training, set its corresponding label to -100.
  • -
  • Do not add BOS/EOS. Axolotl will add them for you based on the default tokenizer for the model you’re using.
  • +
  • You must add BOS and EOS, and make sure that you are training on EOS by not setting its label to -100.
  • For pretraining, do not truncate/pad documents to the context window length.
  • For instruction training, documents must be truncated/padded as desired.
  • diff --git a/search.json b/search.json index d8fb315ba..85b5177b6 100644 --- a/search.json +++ b/search.json @@ -200,7 +200,7 @@ "href": "docs/dataset-formats/tokenized.html", "title": "Custom Pre-Tokenized Dataset", "section": "", - "text": "Pass an empty type: in your axolotl config.\nColumns in Dataset must be exactly input_ids, attention_mask, labels\nTo indicate that a token should be ignored during training, set its corresponding label to -100.\nDo not add BOS/EOS. Axolotl will add them for you based on the default tokenizer for the model you’re using.\nFor pretraining, do not truncate/pad documents to the context window length.\nFor instruction training, documents must be truncated/padded as desired.\n\nSample config:\n\n\nconfig.yml\n\ndatasets:\n - path: /path/to/your/file.jsonl\n ds_type: json\n type:\n\nSample jsonl:\n{\"input_ids\":[271,299,99],\"attention_mask\":[1,1,1],\"labels\":[271,-100,99]}\n{\"input_ids\":[87,227,8383,12],\"attention_mask\":[1,1,1,1],\"labels\":[87,227,8383,12]}", + "text": "Pass an empty type: in your axolotl config.\nColumns in Dataset must be exactly input_ids, attention_mask, labels\nTo indicate that a token should be ignored during training, set its corresponding label to -100.\nYou must add BOS and EOS, and make sure that you are training on EOS by not setting its label to -100.\nFor pretraining, do not truncate/pad documents to the context window length.\nFor instruction training, documents must be truncated/padded as desired.\n\nSample config:\n\n\nconfig.yml\n\ndatasets:\n - path: /path/to/your/file.jsonl\n ds_type: json\n type:\n\nSample jsonl:\n{\"input_ids\":[271,299,99],\"attention_mask\":[1,1,1],\"labels\":[271,-100,99]}\n{\"input_ids\":[87,227,8383,12],\"attention_mask\":[1,1,1,1],\"labels\":[87,227,8383,12]}", "crumbs": [ "Dataset Formats", "Custom Pre-Tokenized Dataset" diff --git a/sitemap.xml b/sitemap.xml index dce3bc74d..14b1baea0 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,106 +2,106 @@ https://axolotl-ai-cloud.github.io/axolotl/examples/colab-notebooks/colab-axolotl-example.html - 2024-09-05T13:58:35.078Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/unsloth.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/amd_hpc.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/rlhf.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/dataset_preprocessing.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/nccl.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/multipack.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/debugging.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/tokenized.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/template_free.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/pretraining.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/index.html - 2024-09-05T13:58:35.086Z + 2024-09-05T14:11:44.033Z https://axolotl-ai-cloud.github.io/axolotl/src/axolotl/integrations/LICENSE.html - 2024-09-05T13:58:35.090Z + 2024-09-05T14:11:44.037Z https://axolotl-ai-cloud.github.io/axolotl/FAQS.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.017Z https://axolotl-ai-cloud.github.io/axolotl/TODO.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/torchao.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/inst_tune.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/index.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/dataset-formats/conversation.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/faq.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/batch_vs_grad.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/input_output.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/multi-node.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/config.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/mac.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z https://axolotl-ai-cloud.github.io/axolotl/docs/fsdp_qlora.html - 2024-09-05T13:58:35.074Z + 2024-09-05T14:11:44.021Z