make sure to use train split if loading from hf
This commit is contained in:
@@ -58,6 +58,7 @@ def load_tokenized_prepared_datasets(tokenizer, cfg, default_dataset_prepared_pa
|
|||||||
try:
|
try:
|
||||||
if cfg.push_dataset_to_hub:
|
if cfg.push_dataset_to_hub:
|
||||||
dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
|
dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
|
||||||
|
dataset = dataset["train"]
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -232,6 +233,7 @@ def load_prepare_datasets(tokenizer: PreTrainedTokenizerBase, cfg, default_datas
|
|||||||
f"checkking for packed prepared dataset from hub... {cfg.push_dataset_to_hub}/{ds_hash}"
|
f"checkking for packed prepared dataset from hub... {cfg.push_dataset_to_hub}/{ds_hash}"
|
||||||
)
|
)
|
||||||
dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
|
dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True)
|
||||||
|
dataset = dataset["train"]
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user