Train parameters exclusively in specific ranges (#1390)

* Train parameters exclusively in specific ranges

* Fix the style and update docs

* Update yaml example
This commit is contained in:
Seungduk Kim
2024-03-15 00:05:42 +09:00
committed by GitHub
parent 3bd8203c35
commit 05bcc9ea56
4 changed files with 492 additions and 19 deletions

View File

@@ -16,12 +16,12 @@ output_dir: ./qlora-out
## You can optionally freeze the entire model and unfreeze a subset of parameters
unfrozen_parameters:
# - lm_head.*
# - model.embed_tokens.*
# - model.layers.2[0-9]+.block_sparse_moe.gate.*
# - model.layers.2[0-9]+.block_sparse_moe.experts.*
# - model.layers.3[0-9]+.block_sparse_moe.gate.*
# - model.layers.3[0-9]+.block_sparse_moe.experts.*
# - ^lm_head.weight$
# - ^model.embed_tokens.weight$[:32000]
# - model.layers.2[0-9]+.block_sparse_moe.gate
# - model.layers.2[0-9]+.block_sparse_moe.experts
# - model.layers.3[0-9]+.block_sparse_moe.gate
# - model.layers.3[0-9]+.block_sparse_moe.experts
model_config:
output_router_logits: true