wrap prepared_ds_path in str() to avoid TypeError in fsspec package (#1548)
* wrap prepared_ds_path in str() to avoid TypeError in fsspec package `fsspec` calls `if "::" in path` on `prepared_ds_path`, which will throw an error if it is a `PosixPath` object. * update test too --------- Co-authored-by: Wing Lian <wing.lian@gmail.com>
This commit is contained in:
@@ -421,7 +421,7 @@ def load_tokenized_prepared_datasets(
|
||||
|
||||
if cfg.local_rank == 0:
|
||||
LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}")
|
||||
dataset.save_to_disk(prepared_ds_path)
|
||||
dataset.save_to_disk(str(prepared_ds_path))
|
||||
if cfg.push_dataset_to_hub:
|
||||
LOG.info(
|
||||
f"Saving merged prepared dataset with push_to_hub... {cfg.push_dataset_to_hub}/{ds_hash}"
|
||||
|
||||
@@ -110,7 +110,7 @@ class TestDatasetPreparation(unittest.TestCase):
|
||||
"""Usual use case. Verify datasets saved via `save_to_disk` can be loaded."""
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
tmp_ds_name = Path(tmp_dir) / "tmp_dataset"
|
||||
self.dataset.save_to_disk(tmp_ds_name)
|
||||
self.dataset.save_to_disk(str(tmp_ds_name))
|
||||
|
||||
prepared_path = Path(tmp_dir) / "prepared"
|
||||
cfg = DictDefault(
|
||||
|
||||
Reference in New Issue
Block a user