support custom field for completion from yml (#580)

* support custom field for completion from yml

* remove legacy completion check and add doc

* update README docs
This commit is contained in:
Wing Lian
2023-09-15 07:48:21 -04:00
committed by GitHub
parent 1aa400721e
commit f7a22632d7
5 changed files with 53 additions and 12 deletions

View File

@@ -322,6 +322,7 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
- path: EleutherAI/pile
name: enron_emails
type: completion # format from earlier
field: text # Optional[str] default: text, field to use for completion data
# huggingface repo with multiple named configurations/subsets
datasets:
@@ -444,6 +445,9 @@ datasets:
# 'no_input_format' cannot include {input}
no_input_format: "{instruction} "
# for completions datsets, uses the provided field if not `text`
field:
# axolotl attempts to save the dataset as an arrow after packing the data together so
# subsequent training attempts load faster, relative path
dataset_prepared_path: data/last_run_prepared