prepared dataset caching, other misc fixes (#665)

* prepared dataset caching, other misc fixes * also don't load from disk cache unless explicit
2023-10-02 21:07:24 -04:00
parent f4868d733c
commit e50a64e85e
32 changed files with 35 additions and 34 deletions
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -12,7 +12,7 @@ push_dataset_to_hub:
 datasets:
  - path: teknium/GPT4-LLM-Cleaned
    type: alpaca:chat
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
 val_set_size: 0.01
 adapter: lora
 lora_model_dir:
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -18,7 +18,7 @@ datasets:
    data_files:
      - Chain-of-Thought/formatted_cot_data/gsm8k_train.json
    type: "alpaca:chat"
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
 val_set_size: 0.01
 # enable QLoRA
 adapter: qlora
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -12,7 +12,7 @@ push_dataset_to_hub:
 datasets:
  - path: teknium/GPT4-LLM-Cleaned
    type: alpaca:chat
-dataset_prepared_path: last_run_prepared
+dataset_prepared_path:
 val_set_size: 0.01
 adapter:
 lora_model_dir: