diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml index e4384a893..071b2bc05 100644 --- a/examples/code-llama/13b/lora.yml +++ b/examples/code-llama/13b/lora.yml @@ -15,7 +15,7 @@ dataset_prepared_path: last_run_prepared val_set_size: 0.01 output_dir: ./lora-out -sequence_len: 100000 +sequence_len: 4096 sample_packing: true pad_to_sequence_len: true diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml index 8e482a22e..3de4b3b4b 100644 --- a/examples/code-llama/13b/qlora.yml +++ b/examples/code-llama/13b/qlora.yml @@ -18,7 +18,7 @@ output_dir: ./qlora-out adapter: qlora lora_model_dir: -sequence_len: 100000 +sequence_len: 4096 sample_packing: true pad_to_sequence_len: true diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml index 8a5c457f6..e52c11181 100644 --- a/examples/code-llama/34b/lora.yml +++ b/examples/code-llama/34b/lora.yml @@ -15,7 +15,7 @@ dataset_prepared_path: last_run_prepared val_set_size: 0.01 output_dir: ./lora-out -sequence_len: 100000 +sequence_len: 4096 sample_packing: true pad_to_sequence_len: true diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml index b0d91fae9..7e6da2e96 100644 --- a/examples/code-llama/34b/qlora.yml +++ b/examples/code-llama/34b/qlora.yml @@ -18,7 +18,7 @@ output_dir: ./qlora-out adapter: qlora lora_model_dir: -sequence_len: 100000 +sequence_len: 4096 sample_packing: true pad_to_sequence_len: true diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml index 1e09555f7..5e9d12e7c 100644 --- a/examples/code-llama/7b/lora.yml +++ b/examples/code-llama/7b/lora.yml @@ -15,7 +15,7 @@ dataset_prepared_path: last_run_prepared val_set_size: 0.01 output_dir: ./lora-out -sequence_len: 100000 +sequence_len: 4096 sample_packing: true pad_to_sequence_len: true diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml index fc9a5eb53..b24b5c162 100644 --- a/examples/code-llama/7b/qlora.yml +++ b/examples/code-llama/7b/qlora.yml @@ -18,7 +18,7 @@ output_dir: ./qlora-out adapter: qlora lora_model_dir: -sequence_len: 100000 +sequence_len: 4096 sample_packing: true pad_to_sequence_len: true