only use revision on hf hub backed datasets

This commit is contained in:
Wing Lian
2024-09-14 13:03:36 -04:00
parent 68db5b1b67
commit d0d22b7812

View File

@@ -320,7 +320,6 @@ def load_tokenized_prepared_datasets(
data_files=config_dataset.data_files, data_files=config_dataset.data_files,
streaming=False, streaming=False,
split=None, split=None,
revision=config_dataset.revision,
) )
else: else:
ds = load_from_disk(config_dataset.path) ds = load_from_disk(config_dataset.path)
@@ -333,7 +332,6 @@ def load_tokenized_prepared_datasets(
data_files=config_dataset.path, data_files=config_dataset.path,
streaming=False, streaming=False,
split=None, split=None,
revision=config_dataset.revision,
) )
else: else:
raise ValueError( raise ValueError(
@@ -367,7 +365,6 @@ def load_tokenized_prepared_datasets(
streaming=False, streaming=False,
split=None, split=None,
storage_options=storage_options, storage_options=storage_options,
revision=config_dataset.revision,
) )
elif config_dataset.path.startswith("https://"): elif config_dataset.path.startswith("https://"):
ds_type = get_ds_type(config_dataset) ds_type = get_ds_type(config_dataset)
@@ -378,7 +375,6 @@ def load_tokenized_prepared_datasets(
streaming=False, streaming=False,
split=None, split=None,
storage_options=storage_options, storage_options=storage_options,
revision=config_dataset.revision,
) )
else: else:
if isinstance(config_dataset.data_files, str): if isinstance(config_dataset.data_files, str):