tweak to make it work when we have no explicit test split
This commit is contained in:
@@ -405,13 +405,14 @@ def load_prepare_datasets(
|
|||||||
private=True,
|
private=True,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
dataset_train = load_tokenized_prepared_datasets(
|
# dataset_train = load_tokenized_prepared_datasets(
|
||||||
|
dataset = load_tokenized_prepared_datasets(
|
||||||
"train", tokenizer, cfg, default_dataset_prepared_path
|
"train", tokenizer, cfg, default_dataset_prepared_path
|
||||||
)
|
)
|
||||||
dataset_test = load_tokenized_prepared_datasets(
|
# dataset_test = load_tokenized_prepared_datasets(
|
||||||
"test", tokenizer, cfg, default_dataset_prepared_path
|
# "test", tokenizer, cfg, default_dataset_prepared_path
|
||||||
)
|
# )
|
||||||
dataset = DatasetDict({"train": dataset_train, "test": dataset_test})
|
# dataset = DatasetDict({"train": dataset_train, "test": dataset_test})
|
||||||
|
|
||||||
if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
|
if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
|
||||||
logging.info(
|
logging.info(
|
||||||
|
|||||||
Reference in New Issue
Block a user