fix: revert local dir dataset load (#878)

This commit is contained in:
NanoCode012
2023-11-18 22:50:41 +09:00
committed by GitHub
parent ddf815022a
commit 575a082aae

View File

@@ -242,7 +242,14 @@ def load_tokenized_prepared_datasets(
local_path = Path(config_dataset.path)
if local_path.exists():
if local_path.is_dir():
ds = load_from_disk(config_dataset.path)
# TODO dirs with arrow or parquet files could be loaded with `load_from_disk`
ds = load_dataset(
config_dataset.path,
name=config_dataset.name,
data_files=config_dataset.data_files,
streaming=False,
split=None,
)
elif local_path.is_file():
ds_type = get_ds_type(config_dataset)