diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7b02804c2..671be4b65 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -7,6 +7,7 @@ on: - 'requirements.txt' - '.github/workflows/*.yml' - "*.md" + - "examples/**/*.y[a]?ml" workflow_dispatch: jobs: diff --git a/examples/mistral/mixtral_22.yml b/examples/mistral/mixtral_22.yml index 3b480cf24..9abb6f407 100644 --- a/examples/mistral/mixtral_22.yml +++ b/examples/mistral/mixtral_22.yml @@ -14,11 +14,13 @@ unfrozen_parameters: - model.layers.4[4-9]+.block_sparse_moe.experts - model.layers.5[0-5]+.block_sparse_moe.gate - model.layers.5[0-5]+.block_sparse_moe.experts - + model_config: output_router_logits: true -DATA_STUFF_HERE +datasets: + - path: yahma/alpaca-cleaned + type: alpaca output_dir: ./out sequence_len: 8000 diff --git a/tests/e2e/patched/test_mixtral_samplepack.py b/tests/e2e/patched/test_mixtral_samplepack.py index 4c05113f5..8baba0307 100644 --- a/tests/e2e/patched/test_mixtral_samplepack.py +++ b/tests/e2e/patched/test_mixtral_samplepack.py @@ -30,7 +30,7 @@ class TestMixtral(unittest.TestCase): cfg = DictDefault( { "base_model": "hf-internal-testing/Mixtral-tiny", - "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "tokenizer_config": "LoneStriker/Mixtral-8x7B-v0.1-HF", "flash_attention": True, "sample_packing": True, "sequence_len": 2048, @@ -74,7 +74,7 @@ class TestMixtral(unittest.TestCase): cfg = DictDefault( { "base_model": "hf-internal-testing/Mixtral-tiny", - "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "tokenizer_config": "LoneStriker/Mixtral-8x7B-v0.1-HF", "flash_attention": True, "sample_packing": True, "sequence_len": 2048, diff --git a/tests/e2e/patched/test_model_patches.py b/tests/e2e/patched/test_model_patches.py index 8384b826f..eecd1b3c1 100644 --- a/tests/e2e/patched/test_model_patches.py +++ b/tests/e2e/patched/test_model_patches.py @@ -22,7 +22,7 @@ class TestModelPatches(unittest.TestCase): cfg = DictDefault( { "base_model": "hf-internal-testing/Mixtral-tiny", - "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "tokenizer_config": "LoneStriker/Mixtral-8x7B-v0.1-HF", "flash_attention": True, "sample_packing": True, "sequence_len": 2048, diff --git a/tests/e2e/test_mixtral.py b/tests/e2e/test_mixtral.py index 68afcdec4..d4dad14ef 100644 --- a/tests/e2e/test_mixtral.py +++ b/tests/e2e/test_mixtral.py @@ -33,7 +33,7 @@ class TestMixtral(unittest.TestCase): cfg = DictDefault( { "base_model": "hf-internal-testing/Mixtral-tiny", - "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "tokenizer_config": "LoneStriker/Mixtral-8x7B-v0.1-HF", "flash_attention": True, "sequence_len": 1024, "load_in_4bit": True, @@ -87,7 +87,7 @@ class TestMixtral(unittest.TestCase): cfg = DictDefault( { "base_model": "hf-internal-testing/Mixtral-tiny", - "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "tokenizer_config": "LoneStriker/Mixtral-8x7B-v0.1-HF", "flash_attention": False, "sequence_len": 1024, "load_in_4bit": True, @@ -141,7 +141,7 @@ class TestMixtral(unittest.TestCase): cfg = DictDefault( { "base_model": "hf-internal-testing/Mixtral-tiny", - "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "tokenizer_config": "LoneStriker/Mixtral-8x7B-v0.1-HF", "flash_attention": True, "sequence_len": 1024, "adapter": "lora", @@ -198,7 +198,7 @@ class TestMixtral(unittest.TestCase): cfg = DictDefault( { "base_model": "hf-internal-testing/Mixtral-tiny", - "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "tokenizer_config": "LoneStriker/Mixtral-8x7B-v0.1-HF", "flash_attention": False, "sequence_len": 1024, "adapter": "lora", @@ -255,7 +255,7 @@ class TestMixtral(unittest.TestCase): cfg = DictDefault( { "base_model": "hf-internal-testing/Mixtral-tiny", - "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "tokenizer_config": "LoneStriker/Mixtral-8x7B-v0.1-HF", "flash_attention": True, "sequence_len": 1024, "val_set_size": 0.1, diff --git a/tests/prompt_strategies/test_alpaca.py b/tests/prompt_strategies/test_alpaca.py index 9c97e4052..51dd5900b 100644 --- a/tests/prompt_strategies/test_alpaca.py +++ b/tests/prompt_strategies/test_alpaca.py @@ -27,7 +27,9 @@ def fixture_alpaca_dataset(): @pytest.fixture(name="tokenizer") def fixture_tokenizer(): # pylint: disable=all - tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") + tokenizer = AutoTokenizer.from_pretrained( + "casperhansen/mistral-7b-instruct-v0.1-awq" + ) tokenizer.add_special_tokens( { "eos_token": AddedToken( diff --git a/tests/prompt_strategies/test_raw_io.py b/tests/prompt_strategies/test_raw_io.py index 8c66538ec..967de169f 100644 --- a/tests/prompt_strategies/test_raw_io.py +++ b/tests/prompt_strategies/test_raw_io.py @@ -43,7 +43,9 @@ def fixture_sharegpt_dataset(): @pytest.fixture(name="tokenizer") def fixture_tokenizer(): - tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") + tokenizer = AutoTokenizer.from_pretrained( + "casperhansen/mistral-7b-instruct-v0.1-awq" + ) tokenizer.add_tokens( [ AddedToken("", rstrip=False, lstrip=False, normalized=False), diff --git a/tests/prompt_strategies/test_sharegpt.py b/tests/prompt_strategies/test_sharegpt.py index 19d63eac8..3ff0eab05 100644 --- a/tests/prompt_strategies/test_sharegpt.py +++ b/tests/prompt_strategies/test_sharegpt.py @@ -96,7 +96,9 @@ def fixture_multi_role_dataset(): @pytest.fixture(name="tokenizer") def fixture_tokenizer(): - tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") + tokenizer = AutoTokenizer.from_pretrained( + "casperhansen/mistral-7b-instruct-v0.1-awq" + ) tokenizer.add_special_tokens( { "eos_token": AddedToken( diff --git a/tests/test_prompt_tokenizers.py b/tests/test_prompt_tokenizers.py index 4e659006f..63e9a621b 100644 --- a/tests/test_prompt_tokenizers.py +++ b/tests/test_prompt_tokenizers.py @@ -454,7 +454,9 @@ class OrpoTokenizationTest(unittest.TestCase): def setUp(self) -> None: # pylint: disable=duplicate-code - tokenizer = LlamaTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") + tokenizer = LlamaTokenizer.from_pretrained( + "casperhansen/mistral-7b-instruct-v0.1-awq" + ) tokenizer.add_special_tokens( { "eos_token": AddedToken(