Support loading data files from a local directory
ref: https://huggingface.co/docs/datasets/v2.13.0/en/package_reference/loading_methods#datasets.load_dataset.path
This commit is contained in:
@@ -102,13 +102,26 @@ def load_tokenized_prepared_datasets(
|
||||
pass
|
||||
|
||||
# prefer local dataset, even if hub exists
|
||||
if Path(d.path).exists():
|
||||
ds = load_dataset(
|
||||
"json",
|
||||
data_files=d.path,
|
||||
streaming=False,
|
||||
split=None,
|
||||
)
|
||||
local_path = Path(d.path)
|
||||
if local_path.exists():
|
||||
if local_path.is_dir():
|
||||
ds = load_dataset(
|
||||
d.path,
|
||||
data_files=d.data_files,
|
||||
streaming=False,
|
||||
split=None,
|
||||
)
|
||||
elif local_path.is_file():
|
||||
ds = load_dataset(
|
||||
"json",
|
||||
data_files=d.path,
|
||||
streaming=False,
|
||||
split=None,
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"unhandled dataset load: local path exists, but is neither a directory or a file"
|
||||
)
|
||||
elif ds_from_hub:
|
||||
if d.data_files:
|
||||
ds = load_dataset(
|
||||
|
||||
Reference in New Issue
Block a user