From 575a082aae3c38762aa66680d9b4657db8b397c4 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Sat, 18 Nov 2023 22:50:41 +0900 Subject: [PATCH] fix: revert local dir dataset load (#878) --- src/axolotl/utils/data.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index 49b36202c..5c41d16fe 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -242,7 +242,14 @@ def load_tokenized_prepared_datasets( local_path = Path(config_dataset.path) if local_path.exists(): if local_path.is_dir(): - ds = load_from_disk(config_dataset.path) + # TODO dirs with arrow or parquet files could be loaded with `load_from_disk` + ds = load_dataset( + config_dataset.path, + name=config_dataset.name, + data_files=config_dataset.data_files, + streaming=False, + split=None, + ) elif local_path.is_file(): ds_type = get_ds_type(config_dataset)