Feat: Add example for Mistral (#644)
* Feat: Add example for Mistral * chore: turn off flash * chore: add is_mistral_derived_model * chore: update following PR
This commit is contained in:
@@ -413,9 +413,10 @@ tokenizer_legacy:
|
||||
# this is reported to improve training speed on some models
|
||||
resize_token_embeddings_to_32x:
|
||||
|
||||
# used to identify if the model is falcon/llama based
|
||||
# used to identify which the model is based on
|
||||
is_falcon_derived_model:
|
||||
is_llama_derived_model:
|
||||
is_mistral_derived_model:
|
||||
|
||||
# whether you are training a 4-bit GPTQ quantized model
|
||||
gptq: true
|
||||
|
||||
Reference in New Issue
Block a user