Add support for revision dataset parameter

This commit is contained in:
Thomas Cleberg
2024-09-12 16:34:58 -05:00
committed by Wing Lian
parent 2fbc6b0c64
commit 68db5b1b67
3 changed files with 77 additions and 1 deletions

View File

@@ -90,6 +90,7 @@ datasets:
shards: # Optional[int] number of shards to split data into
name: # Optional[str] name of dataset configuration to load
train_on_split: train # Optional[str] name of dataset split to load from
revision: # Optional[str] The specific revision of the dataset to use when loading from the Hugging Face Hub. This can be a commit hash, tag, or branch name. If not specified, the latest version will be used. This parameter is ignored for local datasets.
# Optional[str] fastchat conversation type, only used with type: sharegpt
conversation: # Options (see Conversation 'name'): https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py