diff --git a/docker/Dockerfile-cloud b/docker/Dockerfile-cloud
index 69ce143bb..cc8c58415 100644
--- a/docker/Dockerfile-cloud
+++ b/docker/Dockerfile-cloud
@@ -21,7 +21,9 @@ RUN apt install --yes --no-install-recommends openssh-server tmux && \
     printf "\n[[ -z \"\$TMUX\"  ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
     printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
     chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
-    chmod +x /root/cloud-entrypoint.sh
+    chmod +x /root/cloud-entrypoint.sh && \
+    mkdir -p /workspace/data/axolotl-artifacts && \
+    ln -sf /workspace/data/axolotl-artifacts /workspace/axolotl/outputs
 
 ENTRYPOINT ["/root/cloud-entrypoint.sh"]
 CMD ["sleep", "infinity"]
diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml
index 18dd86e6b..ba4e65daa 100644
--- a/examples/cerebras/btlm-ft.yml
+++ b/examples/cerebras/btlm-ft.yml
@@ -38,7 +38,7 @@ wandb_watch:
 wandb_name:
 wandb_log_model:
 
-output_dir: btlm-out
+output_dir: ./outputs/btlm-out
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 1
diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml
index c4f44326c..285607a4c 100644
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -25,7 +25,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 batch_size: 4
 micro_batch_size: 4
 num_epochs: 2
diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml
index ce5a892d0..0ba96cfaa 100644
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml
index d822e6847..787862d01 100644
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml
index dfef2538b..92d4c544a 100644
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml
index 77f821e1c..93a6de877 100644
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml
index 3e6c7fe62..d13f50532 100644
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml
index e817b113c..a1026a982 100644
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb
index 9adbe0004..fc3b76194 100644
--- a/examples/colab-notebooks/colab-axolotl-example.ipynb
+++ b/examples/colab-notebooks/colab-axolotl-example.ipynb
@@ -84,7 +84,7 @@
         "    type: alpaca\n",
         "dataset_prepared_path:\n",
         "val_set_size: 0.05\n",
-        "output_dir: ./qlora-out\n",
+        "output_dir: ./outputs/qlora-out\n",
         "\n",
         "adapter: qlora\n",
         "lora_model_dir:\n",
diff --git a/examples/dbrx/16bit-lora.yaml b/examples/dbrx/16bit-lora.yaml
index e5e3ea921..32b625ac6 100644
--- a/examples/dbrx/16bit-lora.yaml
+++ b/examples/dbrx/16bit-lora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 512
 sample_packing: false
diff --git a/examples/dbrx/8bit-lora.yaml b/examples/dbrx/8bit-lora.yaml
index 89e24db05..50ee0a016 100644
--- a/examples/dbrx/8bit-lora.yaml
+++ b/examples/dbrx/8bit-lora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 512
 sample_packing: false
diff --git a/examples/dbrx/fft-ds-zero3.yaml b/examples/dbrx/fft-ds-zero3.yaml
index 68292707a..60dc201ee 100644
--- a/examples/dbrx/fft-ds-zero3.yaml
+++ b/examples/dbrx/fft-ds-zero3.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 512
 sample_packing: false
diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml
index 5be9c6425..029ca40e0 100644
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -28,7 +28,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./falcon-7b
+output_dir: ./outputs/falcon-7b
 batch_size: 2
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml
index eb1cdfcdb..4e34144ed 100644
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -42,7 +42,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 # QLoRA paper Table 9
 # - 16 for 7b & 13b
diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml
index 1dd46a93f..36264f063 100644
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -28,7 +28,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./falcon-7b
+output_dir: ./outputs/falcon-7b
 batch_size: 2
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/gemma/qlora.yml b/examples/gemma/qlora.yml
index 619a40129..e08facfc5 100644
--- a/examples/gemma/qlora.yml
+++ b/examples/gemma/qlora.yml
@@ -12,7 +12,7 @@ datasets:
   - path: mhenrichsen/alpaca_2k_test
     type: alpaca
 val_set_size: 0.1
-output_dir: ./out
+output_dir: ./outputs/out
 
 adapter: qlora
 lora_r: 32
diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml
index cd3f2e2ad..f801729fa 100644
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -23,7 +23,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 gradient_accumulation_steps: 2
 micro_batch_size: 2
 num_epochs: 2
diff --git a/examples/jamba/qlora.yaml b/examples/jamba/qlora.yaml
index 41a3854fe..3d6f69e79 100644
--- a/examples/jamba/qlora.yaml
+++ b/examples/jamba/qlora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: false
diff --git a/examples/jamba/qlora_deepspeed.yaml b/examples/jamba/qlora_deepspeed.yaml
index ef04fb53f..43a76c00b 100644
--- a/examples/jamba/qlora_deepspeed.yaml
+++ b/examples/jamba/qlora_deepspeed.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: false
diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml
index a672c7b94..088629c08 100644
--- a/examples/jeopardy-bot/config.yml
+++ b/examples/jeopardy-bot/config.yml
@@ -21,7 +21,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./jeopardy-bot-7b
+output_dir: ./outputs/jeopardy-bot-7b
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/llama-2/fft_optimized.yml b/examples/llama-2/fft_optimized.yml
index 74edc95e6..3d94b04b8 100644
--- a/examples/llama-2/fft_optimized.yml
+++ b/examples/llama-2/fft_optimized.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml
index 68ca9ed31..2a706265b 100644
--- a/examples/llama-2/gptq-lora.yml
+++ b/examples/llama-2/gptq-lora.yml
@@ -33,7 +33,7 @@ wandb_project:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./model-out
+output_dir: ./outputs/model-out
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/llama-2/lisa.yml b/examples/llama-2/lisa.yml
index e692c7ac1..7012d1f61 100644
--- a/examples/llama-2/lisa.yml
+++ b/examples/llama-2/lisa.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./lisa-out
+output_dir: ./outputs/lisa-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/loftq.yml b/examples/llama-2/loftq.yml
index 4529a912d..68d9ac014 100644
--- a/examples/llama-2/loftq.yml
+++ b/examples/llama-2/loftq.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index a7793dce4..95bfae692 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml
index 93b3b2a60..88029f92d 100644
--- a/examples/llama-2/qlora-fsdp.yml
+++ b/examples/llama-2/qlora-fsdp.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index 834dbfb33..dda32170b 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml
index 9fd19953c..93247ce06 100644
--- a/examples/llama-2/relora.yml
+++ b/examples/llama-2/relora.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./relora-out
+output_dir: ./outputs/relora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/llama-3/fft-8b.yaml b/examples/llama-3/fft-8b.yaml
index 8c9ba90bf..a36fd740e 100644
--- a/examples/llama-3/fft-8b.yaml
+++ b/examples/llama-3/fft-8b.yaml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 8192
 sample_packing: true
diff --git a/examples/llama-3/lora-8b.yml b/examples/llama-3/lora-8b.yml
index d60f8a303..6b0ebaed8 100644
--- a/examples/llama-3/lora-8b.yml
+++ b/examples/llama-3/lora-8b.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-3/qlora-fsdp-70b.yaml b/examples/llama-3/qlora-fsdp-70b.yaml
index 8d8785bfd..9b74f6b4d 100644
--- a/examples/llama-3/qlora-fsdp-70b.yaml
+++ b/examples/llama-3/qlora-fsdp-70b.yaml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out/qlora-llama3-70b
+output_dir: ./outputs/out/qlora-llama3-70b
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/llama-3/qlora.yml b/examples/llama-3/qlora.yml
index 9cedee8ee..44120d938 100644
--- a/examples/llama-3/qlora.yml
+++ b/examples/llama-3/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/mamba/config.yml b/examples/mamba/config.yml
index 0a5223bca..f88f5138d 100644
--- a/examples/mamba/config.yml
+++ b/examples/mamba/config.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 2048
 sample_packing: false
diff --git a/examples/mistral/bigstral-ds-zero3.yaml b/examples/mistral/bigstral-ds-zero3.yaml
index cc0a44b2a..e993e44a7 100644
--- a/examples/mistral/bigstral-ds-zero3.yaml
+++ b/examples/mistral/bigstral-ds-zero3.yaml
@@ -23,7 +23,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
index c909c63e2..a70937c4f 100644
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 8192
 sample_packing: true
diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml
index 31b0d527e..03c74bb59 100644
--- a/examples/mistral/lora-mps.yml
+++ b/examples/mistral/lora-mps.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 eval_sample_packing: false
 
 adapter: lora
diff --git a/examples/mistral/lora.yml b/examples/mistral/lora.yml
index ac9ac0dd9..0d5dc9edd 100644
--- a/examples/mistral/lora.yml
+++ b/examples/mistral/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.1
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/mistral/mistral-qlora-fsdp.yml b/examples/mistral/mistral-qlora-fsdp.yml
index 71ac1e701..e6b07c594 100644
--- a/examples/mistral/mistral-qlora-fsdp.yml
+++ b/examples/mistral/mistral-qlora-fsdp.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 model_config:
   output_router_logits: true
diff --git a/examples/mistral/mistral-qlora-orpo.yml b/examples/mistral/mistral-qlora-orpo.yml
index 7727fd748..2549ef018 100644
--- a/examples/mistral/mistral-qlora-orpo.yml
+++ b/examples/mistral/mistral-qlora-orpo.yml
@@ -16,7 +16,7 @@ datasets:
     type: chat_template.argilla
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.1
-output_dir: ./mistral-qlora-orpo-out
+output_dir: ./outputs/mistral-qlora-orpo-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
index ac80a2a75..fe68b2817 100644
--- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 model_config:
   output_router_logits: true
diff --git a/examples/mistral/mixtral-qlora-fsdp.yml b/examples/mistral/mixtral-qlora-fsdp.yml
index b6a07ae51..c09597040 100644
--- a/examples/mistral/mixtral-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-qlora-fsdp.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 model_config:
   output_router_logits: true
diff --git a/examples/mistral/mixtral.yml b/examples/mistral/mixtral.yml
index 5ee3da9d6..13fbe92ab 100644
--- a/examples/mistral/mixtral.yml
+++ b/examples/mistral/mixtral.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.0
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 ## You can optionally freeze the entire model and unfreeze a subset of parameters
 unfrozen_parameters:
diff --git a/examples/mistral/mixtral_22.yml b/examples/mistral/mixtral_22.yml
index 9abb6f407..9a1e86386 100644
--- a/examples/mistral/mixtral_22.yml
+++ b/examples/mistral/mixtral_22.yml
@@ -21,7 +21,7 @@ model_config:
 datasets:
   - path: yahma/alpaca-cleaned
     type: alpaca
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 8000
 sample_packing: true
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
index 6fbbb9618..c7bdb155c 100644
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.1
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml
index 45e31266f..530415de1 100644
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -23,7 +23,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./mpt-alpaca-7b
+output_dir: ./outputs/mpt-alpaca-7b
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml
index 0a404c79d..a0473213c 100644
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -25,7 +25,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./openllama-out
+output_dir: ./outputs/openllama-out
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml
index b83b2db4e..2b6784915 100644
--- a/examples/openllama-3b/lora.yml
+++ b/examples/openllama-3b/lora.yml
@@ -31,7 +31,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 gradient_accumulation_steps: 1
 micro_batch_size: 2
 num_epochs: 4
diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml
index 3d6218b30..8d4dc05ca 100644
--- a/examples/openllama-3b/qlora.yml
+++ b/examples/openllama-3b/qlora.yml
@@ -25,7 +25,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 gradient_accumulation_steps: 1
 micro_batch_size: 2
 num_epochs: 4
diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml
index b21386f70..0dabadc7a 100644
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -12,7 +12,7 @@ datasets:
 
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./phi-sft-out
+output_dir: ./outputs/phi-sft-out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml
index d2b5d661c..7c181a3c1 100644
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -12,7 +12,7 @@ datasets:
 
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./phi-sft-out
+output_dir: ./outputs/phi-sft-out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/phi/phi2-ft.yml b/examples/phi/phi2-ft.yml
index 7a2d05d01..27815550b 100644
--- a/examples/phi/phi2-ft.yml
+++ b/examples/phi/phi2-ft.yml
@@ -12,7 +12,7 @@ datasets:
 
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./phi-sft-out
+output_dir: ./outputs/phi-sft-out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml
index e44bba745..18e6beaaf 100644
--- a/examples/pythia-12b/config.yml
+++ b/examples/pythia-12b/config.yml
@@ -26,7 +26,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./pythia-12b
+output_dir: ./outputs/pythia-12b
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 5
diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml
index 7cb07fe25..0aa650f67 100644
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -20,7 +20,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./lora-alpaca-pythia
+output_dir: ./outputs/lora-alpaca-pythia
 gradient_accumulation_steps: 1
 micro_batch_size: 4
 num_epochs: 4
diff --git a/examples/qwen/lora.yml b/examples/qwen/lora.yml
index da4d784e0..dd8dc1e4f 100644
--- a/examples/qwen/lora.yml
+++ b/examples/qwen/lora.yml
@@ -13,7 +13,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 2048  # supports up to 8192
 sample_packing: false
diff --git a/examples/qwen/qlora.yml b/examples/qwen/qlora.yml
index 501a866b2..01c0c0ab8 100644
--- a/examples/qwen/qlora.yml
+++ b/examples/qwen/qlora.yml
@@ -13,7 +13,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 2048  # supports up to 8192
 sample_packing: false
diff --git a/examples/qwen/qwen2-moe-lora.yaml b/examples/qwen/qwen2-moe-lora.yaml
index c59b282d0..452335e38 100644
--- a/examples/qwen/qwen2-moe-lora.yaml
+++ b/examples/qwen/qwen2-moe-lora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 1024  # supports up to 32k
 sample_packing: false
diff --git a/examples/qwen/qwen2-moe-qlora.yaml b/examples/qwen/qwen2-moe-qlora.yaml
index d6a835a0a..bc11007c7 100644
--- a/examples/qwen/qwen2-moe-qlora.yaml
+++ b/examples/qwen/qwen2-moe-qlora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 1024  # supports up to 32k
 sample_packing: false
diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml
index 5a42e2a95..ff395a863 100644
--- a/examples/redpajama/config-3b.yml
+++ b/examples/redpajama/config-3b.yml
@@ -24,7 +24,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./redpajama-alpaca-3b
+output_dir: ./outputs/redpajama-alpaca-3b
 batch_size: 4
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml
index bdfe1bd85..9fee099d4 100644
--- a/examples/replit-3b/config-lora.yml
+++ b/examples/replit-3b/config-lora.yml
@@ -23,7 +23,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./lora-replit
+output_dir: ./outputs/lora-replit
 batch_size: 8
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/stablelm-2/1.6b/fft.yml b/examples/stablelm-2/1.6b/fft.yml
index f3fc16f86..777262a7e 100644
--- a/examples/stablelm-2/1.6b/fft.yml
+++ b/examples/stablelm-2/1.6b/fft.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/stablelm-2/1.6b/lora.yml b/examples/stablelm-2/1.6b/lora.yml
index c5051fab6..c65b9e4cd 100644
--- a/examples/stablelm-2/1.6b/lora.yml
+++ b/examples/stablelm-2/1.6b/lora.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/starcoder2/qlora.yml b/examples/starcoder2/qlora.yml
index 1efdfbc8e..83fc0d89f 100644
--- a/examples/starcoder2/qlora.yml
+++ b/examples/starcoder2/qlora.yml
@@ -11,7 +11,7 @@ datasets:
 
 dataset_prepared_path:
 val_set_size: 0.2
-output_dir: ./qlora
+output_dir: ./outputs/qlora
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/tiny-llama/lora-mps.yml b/examples/tiny-llama/lora-mps.yml
index fd7b02cac..c08be82d3 100644
--- a/examples/tiny-llama/lora-mps.yml
+++ b/examples/tiny-llama/lora-mps.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/tiny-llama/lora.yml b/examples/tiny-llama/lora.yml
index 4a16f14b9..c5ff0437e 100644
--- a/examples/tiny-llama/lora.yml
+++ b/examples/tiny-llama/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/tiny-llama/pretrain.yml b/examples/tiny-llama/pretrain.yml
index 3b68a7f54..e501dcb8e 100644
--- a/examples/tiny-llama/pretrain.yml
+++ b/examples/tiny-llama/pretrain.yml
@@ -14,7 +14,7 @@ pretraining_dataset:
   type: pretrain
 dataset_prepared_path:
 val_set_size: 0.0
-output_dir: ./model-out
+output_dir: ./outputs/model-out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/tiny-llama/qlora.yml b/examples/tiny-llama/qlora.yml
index 3ea313c83..0d21aca9d 100644
--- a/examples/tiny-llama/qlora.yml
+++ b/examples/tiny-llama/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml
index e3faa01bd..7e3f83cbd 100644
--- a/examples/xgen-7b/xgen-7b-8k-qlora.yml
+++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml
@@ -40,7 +40,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 # QLoRA paper Table 9
 # - 16 for 7b & 13b
diff --git a/examples/yi-34B-chat/qlora.yml b/examples/yi-34B-chat/qlora.yml
index dc8c37d18..7fe322d63 100644
--- a/examples/yi-34B-chat/qlora.yml
+++ b/examples/yi-34B-chat/qlora.yml
@@ -33,7 +33,7 @@ eval_sample_packing: false
 eval_batch_size: 1
 
 # LoRA
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 adapter: qlora
 lora_model_dir:
 lora_r: 32
diff --git a/outputs/.gitignore b/outputs/.gitignore
new file mode 100644
index 000000000..72e8ffc0d
--- /dev/null
+++ b/outputs/.gitignore
@@ -0,0 +1 @@
+*