Fix security issue or ignore false positives

2023-05-29 22:26:26 +09:00
parent 83d29209f7
commit a1f9850b91
3 changed files with 10 additions and 10 deletions
--- a/scripts/finetune.py
+++ b/scripts/finetune.py
@@ -136,7 +136,7 @@ def train(

    # load the config from the yaml file
    with open(config, encoding="utf-8") as file:
-        cfg: DictDefault = DictDefault(yaml.load(file, Loader=yaml.Loader))
+        cfg: DictDefault = DictDefault(yaml.safe_load(file))
    # if there are any options passed in the cli, if it is something that seems valid from the yaml,
    # then overwrite the value
    cfg_keys = cfg.keys()
@@ -185,7 +185,7 @@ def train(
        logging.info("check_dataset_labels...")
        check_dataset_labels(
            train_dataset.select(
-                [random.randrange(0, len(train_dataset) - 1) for i in range(5)]
+                [random.randrange(0, len(train_dataset) - 1) for _ in range(5)]  # nosec
            ),
            tokenizer,
        )
--- a/src/axolotl/prompt_tokenizers.py
+++ b/src/axolotl/prompt_tokenizers.py
@@ -11,10 +11,10 @@ from transformers import PreTrainedTokenizer
 from axolotl.prompters import IGNORE_TOKEN_ID

 IGNORE_INDEX = -100
-LLAMA_DEFAULT_PAD_TOKEN = "[PAD]"
-LLAMA_DEFAULT_EOS_TOKEN = "</s>"
-LLAMA_DEFAULT_BOS_TOKEN = "<s>"
-LLAMA_DEFAULT_UNK_TOKEN = "<unk>"
+LLAMA_DEFAULT_PAD_TOKEN = "[PAD]"  # nosec
+LLAMA_DEFAULT_EOS_TOKEN = "</s>"  # nosec
+LLAMA_DEFAULT_BOS_TOKEN = "<s>"  # nosec
+LLAMA_DEFAULT_UNK_TOKEN = "<unk>"  # nosec


 class InvalidDataException(Exception):
--- a/src/axolotl/utils/data.py
+++ b/src/axolotl/utils/data.py
@@ -40,7 +40,7 @@ def load_tokenized_prepared_datasets(
 ) -> DatasetDict:
    tokenizer_name = tokenizer.__class__.__name__
    ds_hash = str(
-        md5(
+        md5(  # nosec
            (
                str(cfg.sequence_len)
                + "@"
@@ -66,7 +66,7 @@ def load_tokenized_prepared_datasets(
                use_auth_token=use_auth_token,
            )
            dataset = dataset["train"]
-    except Exception:  # pylint: disable=broad-except
+    except Exception:  # pylint: disable=broad-except # nosec
        pass

    if dataset:
@@ -272,7 +272,7 @@ def load_prepare_datasets(
        # see if we can go ahead and load the stacked dataset
        seed = f"@{str(cfg.seed)}" if cfg.seed else ""
        ds_hash = str(
-            md5(
+            md5(  # nosec
                (
                    str(cfg.sequence_len)
                    + "@"
@@ -304,7 +304,7 @@ def load_prepare_datasets(
                    use_auth_token=use_auth_token,
                )
                dataset = dataset["train"]
-        except Exception:  # pylint: disable=broad-except
+        except Exception:  # pylint: disable=broad-except # nosec
            pass

        if dataset: