fix double eos token for chatml (#1054) [skip ci]
* fix double eos token for chatml * isolate fix to chatml conversation * fix add special tokens to include rstrip * add test for train_on_inputs for sharegpt * don't use rstrip for chatml
This commit is contained in:
@@ -392,9 +392,13 @@ class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
||||
# this should be the assistant response, should end with an eos token
|
||||
if not content.strip():
|
||||
LOG.warning(f"assistant turn has empty text: {prompt}")
|
||||
add_eos_token = not (
|
||||
conversation.name == "chatml"
|
||||
and conversation.sep == self.tokenizer.eos_token
|
||||
)
|
||||
res = self._tokenize(
|
||||
turn,
|
||||
add_eos_token=True,
|
||||
add_eos_token=add_eos_token,
|
||||
strip_bos_token=True,
|
||||
)
|
||||
role_res = self._tokenize(
|
||||
|
||||
Reference in New Issue
Block a user