prepared dataset caching, other misc fixes (#665)

* prepared dataset caching, other misc fixes

* also don't load from disk cache unless explicit
This commit is contained in:
Wing Lian
2023-10-02 21:07:24 -04:00
committed by GitHub
parent f4868d733c
commit e50a64e85e
32 changed files with 35 additions and 34 deletions

View File

@@ -12,7 +12,7 @@ push_dataset_to_hub:
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca:chat
dataset_prepared_path: last_run_prepared
dataset_prepared_path:
val_set_size: 0.01
adapter: lora
lora_model_dir:

View File

@@ -18,7 +18,7 @@ datasets:
data_files:
- Chain-of-Thought/formatted_cot_data/gsm8k_train.json
type: "alpaca:chat"
dataset_prepared_path: last_run_prepared
dataset_prepared_path:
val_set_size: 0.01
# enable QLoRA
adapter: qlora

View File

@@ -12,7 +12,7 @@ push_dataset_to_hub:
datasets:
- path: teknium/GPT4-LLM-Cleaned
type: alpaca:chat
dataset_prepared_path: last_run_prepared
dataset_prepared_path:
val_set_size: 0.01
adapter:
lora_model_dir: