Compare commits

...

1 Commits

Author SHA1 Message Date
Wing Lian
f6721baf10 tweak to make it work when we have no explicit test split
Some checks failed
pre-commit / pre-commit (push) Has been cancelled
PyTest / test (3.10) (push) Has been cancelled
PyTest / test (3.9) (push) Has been cancelled
2023-07-11 22:40:21 -04:00

View File

@@ -405,13 +405,14 @@ def load_prepare_datasets(
private=True, private=True,
) )
else: else:
dataset_train = load_tokenized_prepared_datasets( # dataset_train = load_tokenized_prepared_datasets(
dataset = load_tokenized_prepared_datasets(
"train", tokenizer, cfg, default_dataset_prepared_path "train", tokenizer, cfg, default_dataset_prepared_path
) )
dataset_test = load_tokenized_prepared_datasets( # dataset_test = load_tokenized_prepared_datasets(
"test", tokenizer, cfg, default_dataset_prepared_path # "test", tokenizer, cfg, default_dataset_prepared_path
) # )
dataset = DatasetDict({"train": dataset_train, "test": dataset_test}) # dataset = DatasetDict({"train": dataset_train, "test": dataset_test})
if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None: if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
logging.info( logging.info(