diff --git a/docker/Dockerfile-cloud b/docker/Dockerfile-cloud index 69ce143bb..cc8c58415 100644 --- a/docker/Dockerfile-cloud +++ b/docker/Dockerfile-cloud @@ -21,7 +21,9 @@ RUN apt install --yes --no-install-recommends openssh-server tmux && \ printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \ printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \ chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \ - chmod +x /root/cloud-entrypoint.sh + chmod +x /root/cloud-entrypoint.sh && \ + mkdir -p /workspace/data/axolotl-artifacts && \ + ln -sf /workspace/data/axolotl-artifacts /workspace/axolotl/outputs ENTRYPOINT ["/root/cloud-entrypoint.sh"] CMD ["sleep", "infinity"] diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml index 18dd86e6b..ba4e65daa 100644 --- a/examples/cerebras/btlm-ft.yml +++ b/examples/cerebras/btlm-ft.yml @@ -38,7 +38,7 @@ wandb_watch: wandb_name: wandb_log_model: -output_dir: btlm-out +output_dir: ./outputs/btlm-out gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 1 diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml index c4f44326c..285607a4c 100644 --- a/examples/cerebras/qlora.yml +++ b/examples/cerebras/qlora.yml @@ -25,7 +25,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out batch_size: 4 micro_batch_size: 4 num_epochs: 2 diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml index ce5a892d0..0ba96cfaa 100644 --- a/examples/code-llama/13b/lora.yml +++ b/examples/code-llama/13b/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml index d822e6847..787862d01 100644 --- a/examples/code-llama/13b/qlora.yml +++ b/examples/code-llama/13b/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml index dfef2538b..92d4c544a 100644 --- a/examples/code-llama/34b/lora.yml +++ b/examples/code-llama/34b/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml index 77f821e1c..93a6de877 100644 --- a/examples/code-llama/34b/qlora.yml +++ b/examples/code-llama/34b/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml index 3e6c7fe62..d13f50532 100644 --- a/examples/code-llama/7b/lora.yml +++ b/examples/code-llama/7b/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml index e817b113c..a1026a982 100644 --- a/examples/code-llama/7b/qlora.yml +++ b/examples/code-llama/7b/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb index 9adbe0004..fc3b76194 100644 --- a/examples/colab-notebooks/colab-axolotl-example.ipynb +++ b/examples/colab-notebooks/colab-axolotl-example.ipynb @@ -84,7 +84,7 @@ " type: alpaca\n", "dataset_prepared_path:\n", "val_set_size: 0.05\n", - "output_dir: ./qlora-out\n", + "output_dir: ./outputs/qlora-out\n", "\n", "adapter: qlora\n", "lora_model_dir:\n", diff --git a/examples/dbrx/16bit-lora.yaml b/examples/dbrx/16bit-lora.yaml index e5e3ea921..32b625ac6 100644 --- a/examples/dbrx/16bit-lora.yaml +++ b/examples/dbrx/16bit-lora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 512 sample_packing: false diff --git a/examples/dbrx/8bit-lora.yaml b/examples/dbrx/8bit-lora.yaml index 89e24db05..50ee0a016 100644 --- a/examples/dbrx/8bit-lora.yaml +++ b/examples/dbrx/8bit-lora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 512 sample_packing: false diff --git a/examples/dbrx/fft-ds-zero3.yaml b/examples/dbrx/fft-ds-zero3.yaml index 68292707a..60dc201ee 100644 --- a/examples/dbrx/fft-ds-zero3.yaml +++ b/examples/dbrx/fft-ds-zero3.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 512 sample_packing: false diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml index 5be9c6425..029ca40e0 100644 --- a/examples/falcon/config-7b-lora.yml +++ b/examples/falcon/config-7b-lora.yml @@ -28,7 +28,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./falcon-7b +output_dir: ./outputs/falcon-7b batch_size: 2 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml index eb1cdfcdb..4e34144ed 100644 --- a/examples/falcon/config-7b-qlora.yml +++ b/examples/falcon/config-7b-qlora.yml @@ -42,7 +42,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out # QLoRA paper Table 9 # - 16 for 7b & 13b diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml index 1dd46a93f..36264f063 100644 --- a/examples/falcon/config-7b.yml +++ b/examples/falcon/config-7b.yml @@ -28,7 +28,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./falcon-7b +output_dir: ./outputs/falcon-7b batch_size: 2 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/gemma/qlora.yml b/examples/gemma/qlora.yml index 619a40129..e08facfc5 100644 --- a/examples/gemma/qlora.yml +++ b/examples/gemma/qlora.yml @@ -12,7 +12,7 @@ datasets: - path: mhenrichsen/alpaca_2k_test type: alpaca val_set_size: 0.1 -output_dir: ./out +output_dir: ./outputs/out adapter: qlora lora_r: 32 diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml index cd3f2e2ad..f801729fa 100644 --- a/examples/gptj/qlora.yml +++ b/examples/gptj/qlora.yml @@ -23,7 +23,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out gradient_accumulation_steps: 2 micro_batch_size: 2 num_epochs: 2 diff --git a/examples/jamba/qlora.yaml b/examples/jamba/qlora.yaml index 41a3854fe..3d6f69e79 100644 --- a/examples/jamba/qlora.yaml +++ b/examples/jamba/qlora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 4096 sample_packing: false diff --git a/examples/jamba/qlora_deepspeed.yaml b/examples/jamba/qlora_deepspeed.yaml index ef04fb53f..43a76c00b 100644 --- a/examples/jamba/qlora_deepspeed.yaml +++ b/examples/jamba/qlora_deepspeed.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 4096 sample_packing: false diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml index a672c7b94..088629c08 100644 --- a/examples/jeopardy-bot/config.yml +++ b/examples/jeopardy-bot/config.yml @@ -21,7 +21,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./jeopardy-bot-7b +output_dir: ./outputs/jeopardy-bot-7b gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/llama-2/fft_optimized.yml b/examples/llama-2/fft_optimized.yml index 74edc95e6..3d94b04b8 100644 --- a/examples/llama-2/fft_optimized.yml +++ b/examples/llama-2/fft_optimized.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml index 68ca9ed31..2a706265b 100644 --- a/examples/llama-2/gptq-lora.yml +++ b/examples/llama-2/gptq-lora.yml @@ -33,7 +33,7 @@ wandb_project: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./model-out +output_dir: ./outputs/model-out gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/llama-2/lisa.yml b/examples/llama-2/lisa.yml index e692c7ac1..7012d1f61 100644 --- a/examples/llama-2/lisa.yml +++ b/examples/llama-2/lisa.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./lisa-out +output_dir: ./outputs/lisa-out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/loftq.yml b/examples/llama-2/loftq.yml index 4529a912d..68d9ac014 100644 --- a/examples/llama-2/loftq.yml +++ b/examples/llama-2/loftq.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml index a7793dce4..95bfae692 100644 --- a/examples/llama-2/lora.yml +++ b/examples/llama-2/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml index 93b3b2a60..88029f92d 100644 --- a/examples/llama-2/qlora-fsdp.yml +++ b/examples/llama-2/qlora-fsdp.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml index 834dbfb33..dda32170b 100644 --- a/examples/llama-2/qlora.yml +++ b/examples/llama-2/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml index 9fd19953c..93247ce06 100644 --- a/examples/llama-2/relora.yml +++ b/examples/llama-2/relora.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./relora-out +output_dir: ./outputs/relora-out adapter: qlora lora_model_dir: diff --git a/examples/llama-3/fft-8b.yaml b/examples/llama-3/fft-8b.yaml index 8c9ba90bf..a36fd740e 100644 --- a/examples/llama-3/fft-8b.yaml +++ b/examples/llama-3/fft-8b.yaml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 8192 sample_packing: true diff --git a/examples/llama-3/lora-8b.yml b/examples/llama-3/lora-8b.yml index d60f8a303..6b0ebaed8 100644 --- a/examples/llama-3/lora-8b.yml +++ b/examples/llama-3/lora-8b.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-3/qlora-fsdp-70b.yaml b/examples/llama-3/qlora-fsdp-70b.yaml index 8d8785bfd..9b74f6b4d 100644 --- a/examples/llama-3/qlora-fsdp-70b.yaml +++ b/examples/llama-3/qlora-fsdp-70b.yaml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out/qlora-llama3-70b +output_dir: ./outputs/out/qlora-llama3-70b adapter: qlora lora_model_dir: diff --git a/examples/llama-3/qlora.yml b/examples/llama-3/qlora.yml index 9cedee8ee..44120d938 100644 --- a/examples/llama-3/qlora.yml +++ b/examples/llama-3/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/mamba/config.yml b/examples/mamba/config.yml index 0a5223bca..f88f5138d 100644 --- a/examples/mamba/config.yml +++ b/examples/mamba/config.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 2048 sample_packing: false diff --git a/examples/mistral/bigstral-ds-zero3.yaml b/examples/mistral/bigstral-ds-zero3.yaml index cc0a44b2a..e993e44a7 100644 --- a/examples/mistral/bigstral-ds-zero3.yaml +++ b/examples/mistral/bigstral-ds-zero3.yaml @@ -23,7 +23,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 2048 sample_packing: true diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml index c909c63e2..a70937c4f 100644 --- a/examples/mistral/config.yml +++ b/examples/mistral/config.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 8192 sample_packing: true diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml index 31b0d527e..03c74bb59 100644 --- a/examples/mistral/lora-mps.yml +++ b/examples/mistral/lora-mps.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0 -output_dir: ./lora-out +output_dir: ./outputs/lora-out eval_sample_packing: false adapter: lora diff --git a/examples/mistral/lora.yml b/examples/mistral/lora.yml index ac9ac0dd9..0d5dc9edd 100644 --- a/examples/mistral/lora.yml +++ b/examples/mistral/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.1 -output_dir: ./lora-out +output_dir: ./outputs/lora-out adapter: lora lora_model_dir: diff --git a/examples/mistral/mistral-qlora-fsdp.yml b/examples/mistral/mistral-qlora-fsdp.yml index 71ac1e701..e6b07c594 100644 --- a/examples/mistral/mistral-qlora-fsdp.yml +++ b/examples/mistral/mistral-qlora-fsdp.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.02 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out model_config: output_router_logits: true diff --git a/examples/mistral/mistral-qlora-orpo.yml b/examples/mistral/mistral-qlora-orpo.yml index 7727fd748..2549ef018 100644 --- a/examples/mistral/mistral-qlora-orpo.yml +++ b/examples/mistral/mistral-qlora-orpo.yml @@ -16,7 +16,7 @@ datasets: type: chat_template.argilla dataset_prepared_path: last_run_prepared val_set_size: 0.1 -output_dir: ./mistral-qlora-orpo-out +output_dir: ./outputs/mistral-qlora-orpo-out adapter: qlora lora_model_dir: diff --git a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml index ac80a2a75..fe68b2817 100644 --- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml +++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.02 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out model_config: output_router_logits: true diff --git a/examples/mistral/mixtral-qlora-fsdp.yml b/examples/mistral/mixtral-qlora-fsdp.yml index b6a07ae51..c09597040 100644 --- a/examples/mistral/mixtral-qlora-fsdp.yml +++ b/examples/mistral/mixtral-qlora-fsdp.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.02 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out model_config: output_router_logits: true diff --git a/examples/mistral/mixtral.yml b/examples/mistral/mixtral.yml index 5ee3da9d6..13fbe92ab 100644 --- a/examples/mistral/mixtral.yml +++ b/examples/mistral/mixtral.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.0 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out ## You can optionally freeze the entire model and unfreeze a subset of parameters unfrozen_parameters: diff --git a/examples/mistral/mixtral_22.yml b/examples/mistral/mixtral_22.yml index 9abb6f407..9a1e86386 100644 --- a/examples/mistral/mixtral_22.yml +++ b/examples/mistral/mixtral_22.yml @@ -21,7 +21,7 @@ model_config: datasets: - path: yahma/alpaca-cleaned type: alpaca -output_dir: ./out +output_dir: ./outputs/out sequence_len: 8000 sample_packing: true diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml index 6fbbb9618..c7bdb155c 100644 --- a/examples/mistral/qlora.yml +++ b/examples/mistral/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.1 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml index 45e31266f..530415de1 100644 --- a/examples/mpt-7b/config.yml +++ b/examples/mpt-7b/config.yml @@ -23,7 +23,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./mpt-alpaca-7b +output_dir: ./outputs/mpt-alpaca-7b gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml index 0a404c79d..a0473213c 100644 --- a/examples/openllama-3b/config.yml +++ b/examples/openllama-3b/config.yml @@ -25,7 +25,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./openllama-out +output_dir: ./outputs/openllama-out gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml index b83b2db4e..2b6784915 100644 --- a/examples/openllama-3b/lora.yml +++ b/examples/openllama-3b/lora.yml @@ -31,7 +31,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./lora-out +output_dir: ./outputs/lora-out gradient_accumulation_steps: 1 micro_batch_size: 2 num_epochs: 4 diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml index 3d6218b30..8d4dc05ca 100644 --- a/examples/openllama-3b/qlora.yml +++ b/examples/openllama-3b/qlora.yml @@ -25,7 +25,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out gradient_accumulation_steps: 1 micro_batch_size: 2 num_epochs: 4 diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml index b21386f70..0dabadc7a 100644 --- a/examples/phi/phi-ft.yml +++ b/examples/phi/phi-ft.yml @@ -12,7 +12,7 @@ datasets: dataset_prepared_path: val_set_size: 0.05 -output_dir: ./phi-sft-out +output_dir: ./outputs/phi-sft-out sequence_len: 2048 sample_packing: true diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml index d2b5d661c..7c181a3c1 100644 --- a/examples/phi/phi-qlora.yml +++ b/examples/phi/phi-qlora.yml @@ -12,7 +12,7 @@ datasets: dataset_prepared_path: val_set_size: 0.05 -output_dir: ./phi-sft-out +output_dir: ./outputs/phi-sft-out sequence_len: 2048 sample_packing: true diff --git a/examples/phi/phi2-ft.yml b/examples/phi/phi2-ft.yml index 7a2d05d01..27815550b 100644 --- a/examples/phi/phi2-ft.yml +++ b/examples/phi/phi2-ft.yml @@ -12,7 +12,7 @@ datasets: dataset_prepared_path: val_set_size: 0.05 -output_dir: ./phi-sft-out +output_dir: ./outputs/phi-sft-out sequence_len: 2048 sample_packing: true diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml index e44bba745..18e6beaaf 100644 --- a/examples/pythia-12b/config.yml +++ b/examples/pythia-12b/config.yml @@ -26,7 +26,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./pythia-12b +output_dir: ./outputs/pythia-12b gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 5 diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml index 7cb07fe25..0aa650f67 100644 --- a/examples/pythia/lora.yml +++ b/examples/pythia/lora.yml @@ -20,7 +20,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./lora-alpaca-pythia +output_dir: ./outputs/lora-alpaca-pythia gradient_accumulation_steps: 1 micro_batch_size: 4 num_epochs: 4 diff --git a/examples/qwen/lora.yml b/examples/qwen/lora.yml index da4d784e0..dd8dc1e4f 100644 --- a/examples/qwen/lora.yml +++ b/examples/qwen/lora.yml @@ -13,7 +13,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 2048 # supports up to 8192 sample_packing: false diff --git a/examples/qwen/qlora.yml b/examples/qwen/qlora.yml index 501a866b2..01c0c0ab8 100644 --- a/examples/qwen/qlora.yml +++ b/examples/qwen/qlora.yml @@ -13,7 +13,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 2048 # supports up to 8192 sample_packing: false diff --git a/examples/qwen/qwen2-moe-lora.yaml b/examples/qwen/qwen2-moe-lora.yaml index c59b282d0..452335e38 100644 --- a/examples/qwen/qwen2-moe-lora.yaml +++ b/examples/qwen/qwen2-moe-lora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 1024 # supports up to 32k sample_packing: false diff --git a/examples/qwen/qwen2-moe-qlora.yaml b/examples/qwen/qwen2-moe-qlora.yaml index d6a835a0a..bc11007c7 100644 --- a/examples/qwen/qwen2-moe-qlora.yaml +++ b/examples/qwen/qwen2-moe-qlora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 1024 # supports up to 32k sample_packing: false diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml index 5a42e2a95..ff395a863 100644 --- a/examples/redpajama/config-3b.yml +++ b/examples/redpajama/config-3b.yml @@ -24,7 +24,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./redpajama-alpaca-3b +output_dir: ./outputs/redpajama-alpaca-3b batch_size: 4 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml index bdfe1bd85..9fee099d4 100644 --- a/examples/replit-3b/config-lora.yml +++ b/examples/replit-3b/config-lora.yml @@ -23,7 +23,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./lora-replit +output_dir: ./outputs/lora-replit batch_size: 8 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/stablelm-2/1.6b/fft.yml b/examples/stablelm-2/1.6b/fft.yml index f3fc16f86..777262a7e 100644 --- a/examples/stablelm-2/1.6b/fft.yml +++ b/examples/stablelm-2/1.6b/fft.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 4096 sample_packing: true diff --git a/examples/stablelm-2/1.6b/lora.yml b/examples/stablelm-2/1.6b/lora.yml index c5051fab6..c65b9e4cd 100644 --- a/examples/stablelm-2/1.6b/lora.yml +++ b/examples/stablelm-2/1.6b/lora.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/starcoder2/qlora.yml b/examples/starcoder2/qlora.yml index 1efdfbc8e..83fc0d89f 100644 --- a/examples/starcoder2/qlora.yml +++ b/examples/starcoder2/qlora.yml @@ -11,7 +11,7 @@ datasets: dataset_prepared_path: val_set_size: 0.2 -output_dir: ./qlora +output_dir: ./outputs/qlora adapter: qlora lora_model_dir: diff --git a/examples/tiny-llama/lora-mps.yml b/examples/tiny-llama/lora-mps.yml index fd7b02cac..c08be82d3 100644 --- a/examples/tiny-llama/lora-mps.yml +++ b/examples/tiny-llama/lora-mps.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/tiny-llama/lora.yml b/examples/tiny-llama/lora.yml index 4a16f14b9..c5ff0437e 100644 --- a/examples/tiny-llama/lora.yml +++ b/examples/tiny-llama/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/tiny-llama/pretrain.yml b/examples/tiny-llama/pretrain.yml index 3b68a7f54..e501dcb8e 100644 --- a/examples/tiny-llama/pretrain.yml +++ b/examples/tiny-llama/pretrain.yml @@ -14,7 +14,7 @@ pretraining_dataset: type: pretrain dataset_prepared_path: val_set_size: 0.0 -output_dir: ./model-out +output_dir: ./outputs/model-out sequence_len: 2048 sample_packing: true diff --git a/examples/tiny-llama/qlora.yml b/examples/tiny-llama/qlora.yml index 3ea313c83..0d21aca9d 100644 --- a/examples/tiny-llama/qlora.yml +++ b/examples/tiny-llama/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml index e3faa01bd..7e3f83cbd 100644 --- a/examples/xgen-7b/xgen-7b-8k-qlora.yml +++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml @@ -40,7 +40,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out # QLoRA paper Table 9 # - 16 for 7b & 13b diff --git a/examples/yi-34B-chat/qlora.yml b/examples/yi-34B-chat/qlora.yml index dc8c37d18..7fe322d63 100644 --- a/examples/yi-34B-chat/qlora.yml +++ b/examples/yi-34B-chat/qlora.yml @@ -33,7 +33,7 @@ eval_sample_packing: false eval_batch_size: 1 # LoRA -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: lora_r: 32 diff --git a/outputs/.gitignore b/outputs/.gitignore new file mode 100644 index 000000000..72e8ffc0d --- /dev/null +++ b/outputs/.gitignore @@ -0,0 +1 @@ +*