From 607a4d33f28862b1d4c23a5524f97c85f5b44d62 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 21 May 2023 22:04:39 -0400 Subject: [PATCH] make sure to use train split if loading from hf --- src/axolotl/utils/data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index f095cc9ab..c974d6730 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -58,6 +58,7 @@ def load_tokenized_prepared_datasets(tokenizer, cfg, default_dataset_prepared_pa try: if cfg.push_dataset_to_hub: dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True) + dataset = dataset["train"] except: pass @@ -232,6 +233,7 @@ def load_prepare_datasets(tokenizer: PreTrainedTokenizerBase, cfg, default_datas f"checkking for packed prepared dataset from hub... {cfg.push_dataset_to_hub}/{ds_hash}" ) dataset = load_dataset(f"{cfg.push_dataset_to_hub}/{ds_hash}", use_auth_token=True) + dataset = dataset["train"] except: pass