diff --git a/README.md b/README.md index 533394f83..403cae4d6 100644 --- a/README.md +++ b/README.md @@ -326,6 +326,8 @@ tokenizer_type: AutoTokenizer trust_remote_code: # use_fast option for tokenizer loading from_pretrained, default to True tokenizer_use_fast: +# Whether to use the legacy tokenizer setting, defaults to True +tokenizer_legacy: # resize the model embeddings when new tokens are added to multiples of 32 # this is reported to improve training speed on some models resize_token_embeddings_to_32x: