From 47961fdb8b57b05ff876edb0b96c44e3253348d7 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 15 Aug 2023 09:34:42 -0400 Subject: [PATCH] update docs for tokenizer_legacy (#401) * update docs for tokenizer_legacy * add default info --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 533394f83..403cae4d6 100644 --- a/README.md +++ b/README.md @@ -326,6 +326,8 @@ tokenizer_type: AutoTokenizer trust_remote_code: # use_fast option for tokenizer loading from_pretrained, default to True tokenizer_use_fast: +# Whether to use the legacy tokenizer setting, defaults to True +tokenizer_legacy: # resize the model embeddings when new tokens are added to multiples of 32 # this is reported to improve training speed on some models resize_token_embeddings_to_32x: