Fix: Higher vram usage for mistral and sample_packing (#691)
* Fix: Higher vram usage for mistral and sample_packing * chore: update comment * chore: lint
This commit is contained in:
@@ -36,10 +36,10 @@ lora_target_modules:
|
||||
- k_proj
|
||||
- o_proj
|
||||
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_project:
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
wandb_run_id:
|
||||
wandb_run_id:
|
||||
wandb_log_model:
|
||||
|
||||
gradient_accumulation_steps: 4
|
||||
@@ -76,4 +76,4 @@ fsdp_config:
|
||||
special_tokens:
|
||||
bos_token: "<s>"
|
||||
eos_token: "</s>"
|
||||
unk_token: "<unk>"
|
||||
unk_token: "<unk>"
|
||||
|
||||
Reference in New Issue
Block a user