add unit tests for cum seq lens, add ability to build cu_seq_lens from positional ids, fix prompt test

This commit is contained in:
Wing Lian
2023-08-06 17:33:40 -04:00
parent c70dae63cc
commit 394a65f11f
5 changed files with 144 additions and 45 deletions

View File

@@ -134,9 +134,15 @@ class InstructionWSystemPromptTokenizingStrategyTest(unittest.TestCase):
"output": "Hi! How can I help?",
}
example = strat.tokenize_prompt(sample)
assert example["input_ids"][0:4] == [1, 835, 2184, 29901] # "<s>### System:"
assert example["input_ids"][5:7] == [1509, 20118] # "use cot"
assert example["input_ids"][9] == 11889 # USER
assert example["input_ids"][0:5] == [
1,
28962,
1254,
12665,
29901,
] # "<s>SYSTEM:"
assert example["input_ids"][5:7] == [671, 20118] # " use cot"
assert example["input_ids"][8] == 11889 # USER
class Llama2ChatTokenizationTest(unittest.TestCase):