fix pylint

This commit is contained in:
Johan Hansson
2024-01-11 16:54:03 +01:00
parent 4fa557889c
commit cd2cda3cda
2 changed files with 4 additions and 44 deletions

View File

@@ -3,12 +3,11 @@ Test module for alpacha integration w chatml
""" """
import pytest import pytest
from datasets import Dataset from datasets import Dataset
from tokenizers import AddedToken
from transformers import AutoTokenizer
from axolotl.datasets import TokenizedPromptDataset from axolotl.datasets import TokenizedPromptDataset
from axolotl.prompt_tokenizers import AlpacaPromptTokenizingStrategy from axolotl.prompt_tokenizers import AlpacaPromptTokenizingStrategy
from axolotl.prompters import AlpacaPrompter from axolotl.prompters import AlpacaPrompter
from utils import fixture_tokenizer
@pytest.fixture(name="alpacha_dataset") @pytest.fixture(name="alpacha_dataset")
@@ -24,25 +23,6 @@ def fixture_alpacha_dataset():
) )
@pytest.fixture(name="tokenizer")
def fixture_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer.add_special_tokens(
{
"eos_token": AddedToken(
"<|im_end|>", rstrip=False, lstrip=False, normalized=False
)
}
)
tokenizer.add_tokens(
[
AddedToken("<|im_start|>", rstrip=False, lstrip=False, normalized=False),
]
)
return tokenizer
class TestAlpacha: class TestAlpacha:
""" """
Test class for alpacha prompter Test class for alpacha prompter

View File

@@ -3,12 +3,11 @@ Test module for sharegpt integration w chatml
""" """
import pytest import pytest
from datasets import Dataset from datasets import Dataset
from tokenizers import AddedToken
from transformers import AutoTokenizer
from axolotl.datasets import TokenizedPromptDataset from axolotl.datasets import TokenizedPromptDataset
from axolotl.prompt_strategies.sharegpt import SimpleShareGPTPromptTokenizingStrategy from axolotl.prompt_strategies.sharegpt import SimpleShareGPTPromptTokenizingStrategy
from axolotl.prompters import ShareGPTPrompterV2 from axolotl.prompters import ShareGPTPrompterV2
from utils import fixture_tokenizer
@pytest.fixture(name="sharegpt_dataset") @pytest.fixture(name="sharegpt_dataset")
@@ -43,25 +42,6 @@ def fixture_sharegpt_dataset():
) )
@pytest.fixture(name="tokenizer")
def fixture_tokenizer():
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer.add_special_tokens(
{
"eos_token": AddedToken(
"<|im_end|>", rstrip=False, lstrip=False, normalized=False
)
}
)
tokenizer.add_tokens(
[
AddedToken("<|im_start|>", rstrip=False, lstrip=False, normalized=False),
]
)
return tokenizer
class TestSharegpt: class TestSharegpt:
""" """
Test class for sharegpt prompter Test class for sharegpt prompter
@@ -96,7 +76,7 @@ class TestSharegpt:
] ]
# fmt: on # fmt: on
def test_w_train_on_input(self, sharegpt_dataset, tokenizer): def test_no_train_on_input(self, sharegpt_dataset, tokenizer):
strategy = SimpleShareGPTPromptTokenizingStrategy( strategy = SimpleShareGPTPromptTokenizingStrategy(
ShareGPTPrompterV2( ShareGPTPrompterV2(
conversation="chatml", conversation="chatml",
@@ -124,7 +104,7 @@ class TestSharegpt:
] ]
# fmt: on # fmt: on
def test_no_train_on_input(self, sharegpt_dataset, tokenizer): def test_w_train_on_input(self, sharegpt_dataset, tokenizer):
strategy = SimpleShareGPTPromptTokenizingStrategy( strategy = SimpleShareGPTPromptTokenizingStrategy(
ShareGPTPrompterV2( ShareGPTPrompterV2(
conversation="chatml", conversation="chatml",