From e6b57decbd559dce82bcc39817e668fc9bc2e09e Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Mon, 29 May 2023 14:26:12 +0900 Subject: [PATCH] Lint tokenization --- src/axolotl/utils/tokenization.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/axolotl/utils/tokenization.py b/src/axolotl/utils/tokenization.py index f23ca8a92..159dbe15d 100644 --- a/src/axolotl/utils/tokenization.py +++ b/src/axolotl/utils/tokenization.py @@ -1,5 +1,8 @@ -from termcolor import colored +"""Module for tokenization utilities""" + + import logging +from termcolor import colored def check_dataset_labels(dataset, tokenizer): @@ -17,7 +20,7 @@ def check_example_labels(example, tokenizer): # You can compare the input_ids and labels element-wise # Remember to ignore positions with IGNORE_TOKEN_ID (if you use it) or attention_mask equal to 0 colored_tokens = [] - for i, (input_id, label_id, mask) in enumerate( + for _, (input_id, label_id, mask) in enumerate( zip(input_ids, labels, attention_mask) ): decoded_input_token = tokenizer.decode(input_id)