Merge pull request #313 from OpenAccess-AI-Collective/tokenizer-llama2-embeddings
don't resize embeddings to multiples of 32x by default
This commit is contained in:
@@ -322,6 +322,9 @@ tokenizer_type: AutoTokenizer
|
|||||||
trust_remote_code:
|
trust_remote_code:
|
||||||
# use_fast option for tokenizer loading from_pretrained, default to True
|
# use_fast option for tokenizer loading from_pretrained, default to True
|
||||||
tokenizer_use_fast:
|
tokenizer_use_fast:
|
||||||
|
# resize the model embeddings when new tokens are added to multiples of 32
|
||||||
|
# this is reported to improve training speed on some models
|
||||||
|
resize_token_embeddings_to_32x:
|
||||||
|
|
||||||
# whether you are training a 4-bit GPTQ quantized model
|
# whether you are training a 4-bit GPTQ quantized model
|
||||||
gptq: true
|
gptq: true
|
||||||
|
|||||||
@@ -301,7 +301,11 @@ def load_model(
|
|||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
embeddings_len = math.ceil(len(tokenizer) / 32) * 32
|
embeddings_len = (
|
||||||
|
math.ceil(len(tokenizer) / 32) * 32
|
||||||
|
if cfg.resize_token_embeddings_to_32x
|
||||||
|
else len(tokenizer)
|
||||||
|
)
|
||||||
model.resize_token_embeddings(embeddings_len)
|
model.resize_token_embeddings(embeddings_len)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
|||||||
Reference in New Issue
Block a user