fix lora target module, require explicit flash attention, fix min logging steps, don't use adam8bit for int4, hash prepared datasets, support hf hub datasets

This commit is contained in:
Wing Lian
2023-04-17 18:01:12 -04:00
parent 4131183115
commit 87e073d0de
4 changed files with 93 additions and 33 deletions

View File

@@ -21,7 +21,7 @@ lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
- q_proj
- w_proj
- v_proj
lora_fan_in_fan_out: false
wandb_project: llama-65b-lora
wandb_watch:

41
configs/llama_7B_4bit.yml Normal file
View File

@@ -0,0 +1,41 @@
base_model: decapoda-research/llama-7b-hf-int4
base_model_config: decapoda-research/llama-7b-hf
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
load_in_8bit: true
datasets:
- path: vicgalle/alpaca-gpt4
type: alpaca
dataset_prepared_path: data/last_run_prepared
val_set_size: 0.04
adapter: lora
lora_model_dir:
sequence_len: 2048
max_packed_sequence_len: 1024
lora_r: 8
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
- q_proj
- v_proj
# - k_proj
# - o_proj
lora_fan_in_fan_out: false
wandb_project:
wandb_watch:
wandb_run_id:
wandb_log_model: checkpoint
output_dir: ./lora-test
batch_size: 8
micro_batch_size: 2
num_epochs: 3
learning_rate: 0.00003
train_on_inputs: false
group_by_length: false
bf16: true
tf32: true
gradient_checkpointing: false
early_stopping_patience: 3
resume_from_checkpoint:
local_rank:
load_4bit: true

View File

@@ -21,7 +21,7 @@ lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
- q_proj
- w_proj
- v_proj
lora_fan_in_fan_out: false
wandb_project: llama-7b-lora
wandb_watch: