Train parameters exclusively in specific ranges (#1390)
* Train parameters exclusively in specific ranges * Fix the style and update docs * Update yaml example
This commit is contained in:
@@ -16,12 +16,12 @@ output_dir: ./qlora-out
|
||||
|
||||
## You can optionally freeze the entire model and unfreeze a subset of parameters
|
||||
unfrozen_parameters:
|
||||
# - lm_head.*
|
||||
# - model.embed_tokens.*
|
||||
# - model.layers.2[0-9]+.block_sparse_moe.gate.*
|
||||
# - model.layers.2[0-9]+.block_sparse_moe.experts.*
|
||||
# - model.layers.3[0-9]+.block_sparse_moe.gate.*
|
||||
# - model.layers.3[0-9]+.block_sparse_moe.experts.*
|
||||
# - ^lm_head.weight$
|
||||
# - ^model.embed_tokens.weight$[:32000]
|
||||
# - model.layers.2[0-9]+.block_sparse_moe.gate
|
||||
# - model.layers.2[0-9]+.block_sparse_moe.experts
|
||||
# - model.layers.3[0-9]+.block_sparse_moe.gate
|
||||
# - model.layers.3[0-9]+.block_sparse_moe.experts
|
||||
|
||||
model_config:
|
||||
output_router_logits: true
|
||||
|
||||
Reference in New Issue
Block a user