Fix security issue or ignore false positives

This commit is contained in:
NanoCode012
2023-05-29 22:26:26 +09:00
parent 83d29209f7
commit a1f9850b91
3 changed files with 10 additions and 10 deletions

View File

@@ -11,10 +11,10 @@ from transformers import PreTrainedTokenizer
from axolotl.prompters import IGNORE_TOKEN_ID
IGNORE_INDEX = -100
LLAMA_DEFAULT_PAD_TOKEN = "[PAD]"
LLAMA_DEFAULT_EOS_TOKEN = "</s>"
LLAMA_DEFAULT_BOS_TOKEN = "<s>"
LLAMA_DEFAULT_UNK_TOKEN = "<unk>"
LLAMA_DEFAULT_PAD_TOKEN = "[PAD]" # nosec
LLAMA_DEFAULT_EOS_TOKEN = "</s>" # nosec
LLAMA_DEFAULT_BOS_TOKEN = "<s>" # nosec
LLAMA_DEFAULT_UNK_TOKEN = "<unk>" # nosec
class InvalidDataException(Exception):

View File

@@ -40,7 +40,7 @@ def load_tokenized_prepared_datasets(
) -> DatasetDict:
tokenizer_name = tokenizer.__class__.__name__
ds_hash = str(
md5(
md5( # nosec
(
str(cfg.sequence_len)
+ "@"
@@ -66,7 +66,7 @@ def load_tokenized_prepared_datasets(
use_auth_token=use_auth_token,
)
dataset = dataset["train"]
except Exception: # pylint: disable=broad-except
except Exception: # pylint: disable=broad-except # nosec
pass
if dataset:
@@ -272,7 +272,7 @@ def load_prepare_datasets(
# see if we can go ahead and load the stacked dataset
seed = f"@{str(cfg.seed)}" if cfg.seed else ""
ds_hash = str(
md5(
md5( # nosec
(
str(cfg.sequence_len)
+ "@"
@@ -304,7 +304,7 @@ def load_prepare_datasets(
use_auth_token=use_auth_token,
)
dataset = dataset["train"]
except Exception: # pylint: disable=broad-except
except Exception: # pylint: disable=broad-except # nosec
pass
if dataset: