wrap prepared_ds_path in str() to avoid TypeError in fsspec package (#1548)

* wrap prepared_ds_path in str() to avoid TypeError in fsspec package

`fsspec` calls `if "::" in path` on `prepared_ds_path`, which will throw an error if it is a `PosixPath` object.

* update test too

---------

Co-authored-by: Wing Lian <wing.lian@gmail.com>
This commit is contained in:
Frank Ruis
2024-04-22 01:55:20 +02:00
committed by GitHub
parent 7d1d22f72f
commit 7477a53287
2 changed files with 2 additions and 2 deletions

View File

@@ -421,7 +421,7 @@ def load_tokenized_prepared_datasets(
if cfg.local_rank == 0:
LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}")
dataset.save_to_disk(prepared_ds_path)
dataset.save_to_disk(str(prepared_ds_path))
if cfg.push_dataset_to_hub:
LOG.info(
f"Saving merged prepared dataset with push_to_hub... {cfg.push_dataset_to_hub}/{ds_hash}"

View File

@@ -110,7 +110,7 @@ class TestDatasetPreparation(unittest.TestCase):
"""Usual use case. Verify datasets saved via `save_to_disk` can be loaded."""
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_ds_name = Path(tmp_dir) / "tmp_dataset"
self.dataset.save_to_disk(tmp_ds_name)
self.dataset.save_to_disk(str(tmp_ds_name))
prepared_path = Path(tmp_dir) / "prepared"
cfg = DictDefault(