diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml index b7f985cbf..b4a121c98 100644 --- a/examples/llama-2/lora.yml +++ b/examples/llama-2/lora.yml @@ -15,7 +15,7 @@ val_set_size: 0.01 output_dir: ./lora-out sequence_len: 4096 -max_packed_sequence_len: 4096 +sample_packing: true adapter: lora lora_model_dir: @@ -49,8 +49,8 @@ early_stopping_patience: resume_from_checkpoint: local_rank: logging_steps: 1 -xformers_attention: true -flash_attention: +xformers_attention: +flash_attention: true warmup_steps: 10 eval_steps: 20 @@ -64,4 +64,3 @@ special_tokens: bos_token: "" eos_token: "" unk_token: "" - pad_token: "" diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml index b6a1f69f2..9a1c4c8c3 100644 --- a/examples/llama-2/qlora.yml +++ b/examples/llama-2/qlora.yml @@ -18,7 +18,8 @@ adapter: qlora lora_model_dir: sequence_len: 4096 -max_packed_sequence_len: 4096 +sample_packing: true + lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 @@ -50,8 +51,8 @@ early_stopping_patience: resume_from_checkpoint: local_rank: logging_steps: 1 -xformers_attention: true -flash_attention: +xformers_attention: +flash_attention: true warmup_steps: 10 eval_steps: 20 @@ -65,4 +66,3 @@ special_tokens: bos_token: "" eos_token: "" unk_token: "" - pad_token: ""