update docs for tokenizer_legacy (#401)

* update docs for tokenizer_legacy

* add default info
This commit is contained in:
Wing Lian
2023-08-15 09:34:42 -04:00
committed by GitHub
parent 7ad37cb6d7
commit 47961fdb8b

View File

@@ -326,6 +326,8 @@ tokenizer_type: AutoTokenizer
trust_remote_code:
# use_fast option for tokenizer loading from_pretrained, default to True
tokenizer_use_fast:
# Whether to use the legacy tokenizer setting, defaults to True
tokenizer_legacy:
# resize the model embeddings when new tokens are added to multiples of 32
# this is reported to improve training speed on some models
resize_token_embeddings_to_32x: