Compare commits

...

1 Commits

Author SHA1 Message Date
mhenrichsen
9084879861 tinyllama 2023-11-16 13:36:01 +00:00

View File

@@ -4,19 +4,20 @@ model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
is_llama_derived_model: true is_llama_derived_model: true
load_in_8bit: true load_in_8bit: false
load_in_4bit: false load_in_4bit: false
strict: false strict: false
datasets: datasets:
- path: mhenrichsen/alpaca_2k_test - path: mhenrichsen/context-aware-splits-english
type: alpaca type: alpaca
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 200
output_dir: ./lora-out output_dir: ./tiny-llama
sequence_len: 4096 sequence_len: 8192
sample_packing: true sample_packing: true
pad_to_sequence_len: true
adapter: lora adapter: lora
lora_model_dir: lora_model_dir:
@@ -32,9 +33,9 @@ wandb_watch:
wandb_run_id: wandb_run_id:
wandb_log_model: wandb_log_model:
gradient_accumulation_steps: 4 gradient_accumulation_steps: 1
micro_batch_size: 2 micro_batch_size: 8
num_epochs: 4 num_epochs: 3
optimizer: adamw_bnb_8bit optimizer: adamw_bnb_8bit
lr_scheduler: cosine lr_scheduler: cosine
learning_rate: 0.0002 learning_rate: 0.0002
@@ -53,13 +54,13 @@ logging_steps: 1
xformers_attention: xformers_attention:
flash_attention: true flash_attention: true
warmup_steps: 10 warmup_steps: 50
eval_steps: 0.05 eval_steps: 0.05
eval_table_size: eval_table_size:
save_steps: save_steps: 0.50
debug: debug:
deepspeed: deepspeed:
weight_decay: 0.0 weight_decay: 0.1
fsdp: fsdp:
fsdp_config: fsdp_config:
special_tokens: special_tokens: