Falcon embeddings (#1149) [skip docker]

* also fix multipack for falcon and add smoke tests

* make sure to handle special tokens and added tokens for lora

* fix reference to model_type

* fix tests for falcon

* fix stray typo

* fixes for smoke tests
This commit is contained in:
Wing Lian
2024-01-22 21:01:42 -05:00
committed by GitHub
parent 0f77b8d798
commit e799e08d3c
10 changed files with 326 additions and 19 deletions

View File

@@ -32,6 +32,7 @@ class TestMixtral(unittest.TestCase):
"base_model": "hf-internal-testing/Mixtral-tiny",
"tokenizer_config": "mistralai/Mixtral-8x7B-v0.1",
"flash_attention": True,
"sample_packing": True,
"sequence_len": 2048,
"load_in_4bit": True,
"adapter": "qlora",
@@ -57,7 +58,6 @@ class TestMixtral(unittest.TestCase):
"max_steps": 20,
"save_steps": 10,
"eval_steps": 10,
"sample_packing": True,
"bf16": "auto",
}
)
@@ -76,6 +76,7 @@ class TestMixtral(unittest.TestCase):
"base_model": "hf-internal-testing/Mixtral-tiny",
"tokenizer_config": "mistralai/Mixtral-8x7B-v0.1",
"flash_attention": True,
"sample_packing": True,
"sequence_len": 2048,
"val_set_size": 0.1,
"special_tokens": {},
@@ -95,7 +96,6 @@ class TestMixtral(unittest.TestCase):
"max_steps": 20,
"save_steps": 10,
"eval_steps": 10,
"sample_packing": True,
"bf16": "auto",
}
)