Support loading data files from a local directory
ref: https://huggingface.co/docs/datasets/v2.13.0/en/package_reference/loading_methods#datasets.load_dataset.path
This commit is contained in:
@@ -102,13 +102,26 @@ def load_tokenized_prepared_datasets(
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
# prefer local dataset, even if hub exists
|
# prefer local dataset, even if hub exists
|
||||||
if Path(d.path).exists():
|
local_path = Path(d.path)
|
||||||
ds = load_dataset(
|
if local_path.exists():
|
||||||
"json",
|
if local_path.is_dir():
|
||||||
data_files=d.path,
|
ds = load_dataset(
|
||||||
streaming=False,
|
d.path,
|
||||||
split=None,
|
data_files=d.data_files,
|
||||||
)
|
streaming=False,
|
||||||
|
split=None,
|
||||||
|
)
|
||||||
|
elif local_path.is_file():
|
||||||
|
ds = load_dataset(
|
||||||
|
"json",
|
||||||
|
data_files=d.path,
|
||||||
|
streaming=False,
|
||||||
|
split=None,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"unhandled dataset load: local path exists, but is neither a directory or a file"
|
||||||
|
)
|
||||||
elif ds_from_hub:
|
elif ds_from_hub:
|
||||||
if d.data_files:
|
if d.data_files:
|
||||||
ds = load_dataset(
|
ds = load_dataset(
|
||||||
|
|||||||
Reference in New Issue
Block a user