Phi2 multipack (#1173)

* phi2 multipack

* update validation and examples for phi

* more updates to phi examples

* make sure to use the correct collator for phi multipack

* phi needs attention mask now for multipack

* if the special token already exists in the tokenizer, don't require in lora modules to save

* fix qlora yml for phi, fix phi test validation

* test qlora too

* make sure flash attention is enabled for the test

* don't use remote code for phi anymore

* reduce sequence len for sample packing phi
This commit is contained in:
Wing Lian
2024-01-23 12:54:36 -05:00
committed by GitHub
parent b715cd549a
commit 814aee6603
18 changed files with 201 additions and 2269 deletions

View File

@@ -1,8 +1,6 @@
base_model: microsoft/phi-1_5
model_type: PhiForCausalLM
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
is_llama_derived_model: false
trust_remote_code: true
load_in_8bit: false
load_in_4bit: false
@@ -18,7 +16,7 @@ output_dir: ./phi-sft-out
sequence_len: 2048
sample_packing: true
pad_to_sequence_len:
pad_to_sequence_len: true
adapter:
lora_model_dir:
@@ -35,7 +33,7 @@ wandb_name:
wandb_log_model:
gradient_accumulation_steps: 1
micro_batch_size: 1
micro_batch_size: 2
num_epochs: 4
optimizer: adamw_torch
adam_beta2: 0.95
@@ -45,18 +43,20 @@ lr_scheduler: cosine
learning_rate: 0.000003
train_on_inputs: false
group_by_length: true
group_by_length: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing:
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: True
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention:
flash_attention: true
warmup_steps: 100
evals_per_epoch: 4
@@ -68,7 +68,4 @@ fsdp:
fsdp_config:
resize_token_embeddings_to_32x: true
special_tokens:
bos_token: "<|endoftext|>"
eos_token: "<|endoftext|>"
unk_token: "<|endoftext|>"
pad_token: "<|endoftext|>"