Fix drop_long_seq bug due to truncation in prompt tokenization strategies when using chat_template (#1867)
This commit is contained in:
@@ -350,7 +350,8 @@ def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
|
|||||||
),
|
),
|
||||||
"roles": ds_cfg.get("roles"),
|
"roles": ds_cfg.get("roles"),
|
||||||
"drop_system_message": ds_cfg.get("drop_system_message", False),
|
"drop_system_message": ds_cfg.get("drop_system_message", False),
|
||||||
"max_length": cfg.sequence_len,
|
# we need to add one for detecting sequences with exceeding the `sequence_len` limit.
|
||||||
|
"max_length": cfg.sequence_len + 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
strategy_params = {
|
strategy_params = {
|
||||||
|
|||||||
Reference in New Issue
Block a user