support for datasets with multiple names (#480)

* support for datasets with multiple names

* update docs
This commit is contained in:
Wing Lian
2023-08-29 06:18:17 -07:00
committed by GitHub
parent e356b297cb
commit 5ac3392075
2 changed files with 19 additions and 1 deletions

View File

@@ -328,6 +328,15 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
name: enron_emails
type: completion # format from earlier
# huggingface repo with multiple named configurations/subsets
datasets:
- path: bigcode/commitpackft
name:
- ruby
- python
- typescript
type: ... # unimplemented custom format
# local
datasets:
- path: data.jsonl # or json