update outputs path so that we can mount workspace to /workspace/data (#1623)

* update outputs path so that we can mount workspace to /workspace/data

* fix ln order
This commit is contained in:
Wing Lian
2024-05-15 12:44:13 -04:00
committed by GitHub
parent 3319780300
commit 4fde300e5f
70 changed files with 72 additions and 69 deletions

View File

@@ -21,7 +21,9 @@ RUN apt install --yes --no-install-recommends openssh-server tmux && \
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \ printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \ printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \ chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
chmod +x /root/cloud-entrypoint.sh chmod +x /root/cloud-entrypoint.sh && \
mkdir -p /workspace/data/axolotl-artifacts && \
ln -sf /workspace/data/axolotl-artifacts /workspace/axolotl/outputs
ENTRYPOINT ["/root/cloud-entrypoint.sh"] ENTRYPOINT ["/root/cloud-entrypoint.sh"]
CMD ["sleep", "infinity"] CMD ["sleep", "infinity"]

View File

@@ -38,7 +38,7 @@ wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: btlm-out output_dir: ./outputs/btlm-out
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 1 num_epochs: 1

View File

@@ -25,7 +25,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
batch_size: 4 batch_size: 4
micro_batch_size: 4 micro_batch_size: 4
num_epochs: 2 num_epochs: 2

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -84,7 +84,7 @@
" type: alpaca\n", " type: alpaca\n",
"dataset_prepared_path:\n", "dataset_prepared_path:\n",
"val_set_size: 0.05\n", "val_set_size: 0.05\n",
"output_dir: ./qlora-out\n", "output_dir: ./outputs/qlora-out\n",
"\n", "\n",
"adapter: qlora\n", "adapter: qlora\n",
"lora_model_dir:\n", "lora_model_dir:\n",

View File

@@ -10,7 +10,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./out output_dir: ./outputs/out
sequence_len: 512 sequence_len: 512
sample_packing: false sample_packing: false

View File

@@ -10,7 +10,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./out output_dir: ./outputs/out
sequence_len: 512 sequence_len: 512
sample_packing: false sample_packing: false

View File

@@ -10,7 +10,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./out output_dir: ./outputs/out
sequence_len: 512 sequence_len: 512
sample_packing: false sample_packing: false

View File

@@ -28,7 +28,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./falcon-7b output_dir: ./outputs/falcon-7b
batch_size: 2 batch_size: 2
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 4 num_epochs: 4

View File

@@ -42,7 +42,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
# QLoRA paper Table 9 # QLoRA paper Table 9
# - 16 for 7b & 13b # - 16 for 7b & 13b

View File

@@ -28,7 +28,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./falcon-7b output_dir: ./outputs/falcon-7b
batch_size: 2 batch_size: 2
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 4 num_epochs: 4

View File

@@ -12,7 +12,7 @@ datasets:
- path: mhenrichsen/alpaca_2k_test - path: mhenrichsen/alpaca_2k_test
type: alpaca type: alpaca
val_set_size: 0.1 val_set_size: 0.1
output_dir: ./out output_dir: ./outputs/out
adapter: qlora adapter: qlora
lora_r: 32 lora_r: 32

View File

@@ -23,7 +23,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
gradient_accumulation_steps: 2 gradient_accumulation_steps: 2
micro_batch_size: 2 micro_batch_size: 2
num_epochs: 2 num_epochs: 2

View File

@@ -10,7 +10,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./out output_dir: ./outputs/out
sequence_len: 4096 sequence_len: 4096
sample_packing: false sample_packing: false

View File

@@ -10,7 +10,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./out output_dir: ./outputs/out
sequence_len: 4096 sequence_len: 4096
sample_packing: false sample_packing: false

View File

@@ -21,7 +21,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./jeopardy-bot-7b output_dir: ./outputs/jeopardy-bot-7b
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 4 num_epochs: 4

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./out output_dir: ./outputs/out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -33,7 +33,7 @@ wandb_project:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./model-out output_dir: ./outputs/model-out
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 4 num_epochs: 4

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lisa-out output_dir: ./outputs/lisa-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -12,7 +12,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./relora-out output_dir: ./outputs/relora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./out output_dir: ./outputs/out
sequence_len: 8192 sequence_len: 8192
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./out/qlora-llama3-70b output_dir: ./outputs/out/qlora-llama3-70b
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0 val_set_size: 0
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -12,7 +12,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./out output_dir: ./outputs/out
sequence_len: 2048 sequence_len: 2048
sample_packing: false sample_packing: false

View File

@@ -23,7 +23,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./out output_dir: ./outputs/out
sequence_len: 2048 sequence_len: 2048
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./out output_dir: ./outputs/out
sequence_len: 8192 sequence_len: 8192
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0 val_set_size: 0
output_dir: ./lora-out output_dir: ./outputs/lora-out
eval_sample_packing: false eval_sample_packing: false
adapter: lora adapter: lora

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.1 val_set_size: 0.1
output_dir: ./lora-out output_dir: ./outputs/lora-out
adapter: lora adapter: lora
lora_model_dir: lora_model_dir:

View File

@@ -12,7 +12,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.02 val_set_size: 0.02
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
model_config: model_config:
output_router_logits: true output_router_logits: true

View File

@@ -16,7 +16,7 @@ datasets:
type: chat_template.argilla type: chat_template.argilla
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.1 val_set_size: 0.1
output_dir: ./mistral-qlora-orpo-out output_dir: ./outputs/mistral-qlora-orpo-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.02 val_set_size: 0.02
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
model_config: model_config:
output_router_logits: true output_router_logits: true

View File

@@ -12,7 +12,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.02 val_set_size: 0.02
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
model_config: model_config:
output_router_logits: true output_router_logits: true

View File

@@ -12,7 +12,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
## You can optionally freeze the entire model and unfreeze a subset of parameters ## You can optionally freeze the entire model and unfreeze a subset of parameters
unfrozen_parameters: unfrozen_parameters:

View File

@@ -21,7 +21,7 @@ model_config:
datasets: datasets:
- path: yahma/alpaca-cleaned - path: yahma/alpaca-cleaned
type: alpaca type: alpaca
output_dir: ./out output_dir: ./outputs/out
sequence_len: 8000 sequence_len: 8000
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.1 val_set_size: 0.1
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -23,7 +23,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./mpt-alpaca-7b output_dir: ./outputs/mpt-alpaca-7b
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 4 num_epochs: 4

View File

@@ -25,7 +25,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./openllama-out output_dir: ./outputs/openllama-out
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 4 num_epochs: 4

View File

@@ -31,7 +31,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./lora-out output_dir: ./outputs/lora-out
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1
micro_batch_size: 2 micro_batch_size: 2
num_epochs: 4 num_epochs: 4

View File

@@ -25,7 +25,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1
micro_batch_size: 2 micro_batch_size: 2
num_epochs: 4 num_epochs: 4

View File

@@ -12,7 +12,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./phi-sft-out output_dir: ./outputs/phi-sft-out
sequence_len: 2048 sequence_len: 2048
sample_packing: true sample_packing: true

View File

@@ -12,7 +12,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./phi-sft-out output_dir: ./outputs/phi-sft-out
sequence_len: 2048 sequence_len: 2048
sample_packing: true sample_packing: true

View File

@@ -12,7 +12,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./phi-sft-out output_dir: ./outputs/phi-sft-out
sequence_len: 2048 sequence_len: 2048
sample_packing: true sample_packing: true

View File

@@ -26,7 +26,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./pythia-12b output_dir: ./outputs/pythia-12b
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 5 num_epochs: 5

View File

@@ -20,7 +20,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./lora-alpaca-pythia output_dir: ./outputs/lora-alpaca-pythia
gradient_accumulation_steps: 1 gradient_accumulation_steps: 1
micro_batch_size: 4 micro_batch_size: 4
num_epochs: 4 num_epochs: 4

View File

@@ -13,7 +13,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 2048 # supports up to 8192 sequence_len: 2048 # supports up to 8192
sample_packing: false sample_packing: false

View File

@@ -13,7 +13,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 2048 # supports up to 8192 sequence_len: 2048 # supports up to 8192
sample_packing: false sample_packing: false

View File

@@ -10,7 +10,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./out output_dir: ./outputs/out
sequence_len: 1024 # supports up to 32k sequence_len: 1024 # supports up to 32k
sample_packing: false sample_packing: false

View File

@@ -10,7 +10,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./out output_dir: ./outputs/out
sequence_len: 1024 # supports up to 32k sequence_len: 1024 # supports up to 32k
sample_packing: false sample_packing: false

View File

@@ -24,7 +24,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./redpajama-alpaca-3b output_dir: ./outputs/redpajama-alpaca-3b
batch_size: 4 batch_size: 4
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 4 num_epochs: 4

View File

@@ -23,7 +23,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./lora-replit output_dir: ./outputs/lora-replit
batch_size: 8 batch_size: 8
micro_batch_size: 1 micro_batch_size: 1
num_epochs: 4 num_epochs: 4

View File

@@ -12,7 +12,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: last_run_prepared dataset_prepared_path: last_run_prepared
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./out output_dir: ./outputs/out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -12,7 +12,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.2 val_set_size: 0.2
output_dir: ./qlora output_dir: ./outputs/qlora
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0 val_set_size: 0
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./lora-out output_dir: ./outputs/lora-out
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -14,7 +14,7 @@ pretraining_dataset:
type: pretrain type: pretrain
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.0 val_set_size: 0.0
output_dir: ./model-out output_dir: ./outputs/model-out
sequence_len: 2048 sequence_len: 2048
sample_packing: true sample_packing: true

View File

@@ -11,7 +11,7 @@ datasets:
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:

View File

@@ -40,7 +40,7 @@ wandb_entity:
wandb_watch: wandb_watch:
wandb_name: wandb_name:
wandb_log_model: wandb_log_model:
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
# QLoRA paper Table 9 # QLoRA paper Table 9
# - 16 for 7b & 13b # - 16 for 7b & 13b

View File

@@ -33,7 +33,7 @@ eval_sample_packing: false
eval_batch_size: 1 eval_batch_size: 1
# LoRA # LoRA
output_dir: ./qlora-out output_dir: ./outputs/qlora-out
adapter: qlora adapter: qlora
lora_model_dir: lora_model_dir:
lora_r: 32 lora_r: 32

1
outputs/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
*