Compare commits
1 Commits
rala
...
pytest-eac
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
79612da5c8 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,7 +1,6 @@
|
|||||||
**/axolotl.egg-info
|
**/axolotl.egg-info
|
||||||
configs
|
configs
|
||||||
last_run_prepared/
|
last_run_prepared/
|
||||||
outputs
|
|
||||||
.vscode
|
.vscode
|
||||||
_site/
|
_site/
|
||||||
|
|
||||||
@@ -186,6 +185,3 @@ out/
|
|||||||
|
|
||||||
# vim
|
# vim
|
||||||
*.swp
|
*.swp
|
||||||
|
|
||||||
# symlinked to axolotl-artifacts in docker containers
|
|
||||||
outputs
|
|
||||||
|
|||||||
10
cicd/cicd.sh
10
cicd/cicd.sh
@@ -4,6 +4,12 @@ set -e
|
|||||||
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
||||||
|
|
||||||
pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
|
pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
|
||||||
pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
|
# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
|
||||||
pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
|
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/patched/
|
||||||
|
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/integrations/
|
||||||
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
|
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
|
||||||
|
|
||||||
|
tests=$(pytest --collect-only -q tests/e2e/each)
|
||||||
|
for t in $tests; do
|
||||||
|
pytest $t
|
||||||
|
done
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""
|
"""
|
||||||
modal application to run axolotl gpu tests in Modal
|
modal application to run axolotl gpu tests in Modal
|
||||||
"""
|
"""
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|||||||
@@ -127,40 +127,34 @@ datasets:
|
|||||||
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
|
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
|
||||||
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
|
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
|
||||||
chat_template: tokenizer_default
|
chat_template: tokenizer_default
|
||||||
|
# Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
|
||||||
# Custom jinja chat template. Used only if `chat_template: jinja` or empty.
|
|
||||||
chat_template_jinja:
|
chat_template_jinja:
|
||||||
|
# The key in the data example that contains the messages. Default is "messages".
|
||||||
# Key containing the messages (default: "messages")
|
|
||||||
field_messages: messages
|
field_messages: messages
|
||||||
# Key for role in each message (default: "role")
|
# The key in the message turn that contains the role. Default is "role".
|
||||||
message_field_role: role
|
message_field_role: role
|
||||||
# Key for content in each message (default: "content")
|
# The key in the message turn that contains the content. Default is "content".
|
||||||
message_field_content: content
|
message_field_content: content
|
||||||
|
# Optional[Dict[str, List]]. Roles mapping for the messages.
|
||||||
# Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
|
|
||||||
roles:
|
roles:
|
||||||
user: ["human", "user"]
|
user: ["human", "user"]
|
||||||
assistant: ["gpt", "assistant"]
|
assistant: ["gpt", "assistant", "ai"]
|
||||||
system: ["system"]
|
system: ["system"]
|
||||||
tool: ["tool"]
|
|
||||||
|
|
||||||
# IMPORTANT: The following fields determine which parts of the conversation to train on.
|
## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.
|
||||||
# Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
|
|
||||||
# See examples at `docs/dataset-formats/conversation.qmd`
|
|
||||||
# Note: If the below 4 fields are empty, defaults to training only on the last message.
|
|
||||||
|
|
||||||
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
|
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
|
||||||
roles_to_train: ["assistant"] # default
|
roles_to_train: ["gpt", "assistant"]
|
||||||
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
|
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
|
||||||
# - all: train on all EOS tokens
|
# - all: train on all EOS tokens
|
||||||
# - turn (default): train on the EOS token at the end of each trainable turn
|
# - turn: train on the EOS token at the end of each trainable turn
|
||||||
# - last: train on the last EOS token in the conversation
|
# - last: train on the last EOS token in the conversation
|
||||||
train_on_eos: last
|
train_on_eos: last
|
||||||
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
|
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
|
||||||
message_field_training: training
|
message_field_training: training
|
||||||
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
|
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
|
||||||
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
|
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
|
||||||
|
# See example at `docs/dataset-formats/conversation.qmd`
|
||||||
message_field_training_detail: train_detail
|
message_field_training_detail: train_detail
|
||||||
|
|
||||||
|
|
||||||
@@ -245,9 +239,6 @@ sample_packing_group_size: 100000
|
|||||||
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
|
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
|
||||||
sample_packing_bin_size: 200
|
sample_packing_bin_size: 200
|
||||||
|
|
||||||
# Use batch flattening for speedups when not using sample_packing
|
|
||||||
batch_flattening:
|
|
||||||
|
|
||||||
# Passed through to transformers when loading the model when launched without accelerate
|
# Passed through to transformers when loading the model when launched without accelerate
|
||||||
# Use `sequential` when training w/ model parallelism to limit memory
|
# Use `sequential` when training w/ model parallelism to limit memory
|
||||||
device_map:
|
device_map:
|
||||||
@@ -340,8 +331,7 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
|
|||||||
output_dir: ./completed-model
|
output_dir: ./completed-model
|
||||||
|
|
||||||
# Whether to use torch.compile and which backend to use
|
# Whether to use torch.compile and which backend to use
|
||||||
# setting to `auto` will enable torch compile when torch>=2.5.1
|
torch_compile: # bool
|
||||||
torch_compile: # Optional[Union[Literal["auto"], bool]]
|
|
||||||
torch_compile_backend: # Optional[str]
|
torch_compile_backend: # Optional[str]
|
||||||
|
|
||||||
# Training hyperparameters
|
# Training hyperparameters
|
||||||
@@ -373,10 +363,6 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
|
|||||||
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
|
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
|
||||||
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
|
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
|
||||||
|
|
||||||
profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
|
|
||||||
# see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
|
|
||||||
# snapshots can be visualized @ https://pytorch.org/memory_viz
|
|
||||||
|
|
||||||
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
|
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
|
||||||
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
|
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
|
||||||
|
|
||||||
|
|||||||
@@ -68,8 +68,6 @@ We recommend checking the below examples for other usecases.
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
roles_to_train:
|
|
||||||
train_on_eos:
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
|
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
|
||||||
@@ -79,7 +77,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
roles_to_train: ["assistant"] # default value
|
roles_to_train: ["assistant"]
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
|
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
|
||||||
@@ -89,6 +87,7 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
|
roles_to_train: ["assistant"]
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
|
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
|
||||||
@@ -100,6 +99,7 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
|
roles_to_train: ["assistant"]
|
||||||
```
|
```
|
||||||
|
|
||||||
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
|
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: cerebras/btlm-3b-8k-base
|
base_model: cerebras/btlm-3b-8k-base
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: GPT2Tokenizer
|
tokenizer_type: GPT2Tokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
tokenizer_use_fast: true
|
tokenizer_use_fast: true
|
||||||
tokenizer_legacy: true
|
tokenizer_legacy: true
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: cerebras/Cerebras-GPT-1.3B
|
base_model: cerebras/Cerebras-GPT-1.3B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-13b-hf
|
base_model: codellama/CodeLlama-13b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-13b-hf
|
base_model: codellama/CodeLlama-13b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-34b-hf
|
base_model: codellama/CodeLlama-34b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-34b-hf
|
base_model: codellama/CodeLlama-34b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-7b-hf
|
base_model: codellama/CodeLlama-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-7b-hf
|
base_model: codellama/CodeLlama-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: LnL-AI/dbrx-base-converted-v2
|
base_model: LnL-AI/dbrx-base-converted-v2
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: LnL-AI/dbrx-base-converted-v2
|
base_model: LnL-AI/dbrx-base-converted-v2
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: LnL-AI/dbrx-base-converted-v2
|
base_model: LnL-AI/dbrx-base-converted-v2
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: deepseek-ai/DeepSeek-V2-Lite
|
base_model: deepseek-ai/DeepSeek-V2-Lite
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
|
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
base_model: tiiuae/falcon-7b
|
base_model: tiiuae/falcon-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,15 +1,10 @@
|
|||||||
# 1b: tiiuae/falcon-rw-1b
|
# 1b: tiiuae/falcon-rw-1b
|
||||||
# 40b: tiiuae/falcon-40b
|
# 40b: tiiuae/falcon-40b
|
||||||
base_model: tiiuae/falcon-7b
|
base_model: tiiuae/falcon-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
|
||||||
tokenizer_type: AutoTokenizer
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
model_type: AutoModelForCausalLM
|
||||||
|
tokenizer_type: AutoTokenizer
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
# enable 4bit for QLoRA
|
# enable 4bit for QLoRA
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
base_model: tiiuae/falcon-7b
|
base_model: tiiuae/falcon-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
# use google/gemma-7b if you have access
|
# use google/gemma-7b if you have access
|
||||||
base_model: mhenrichsen/gemma-7b
|
base_model: mhenrichsen/gemma-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: google/gemma-2-9b
|
base_model: google/gemma-2-9b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: google/gemma-2-2b
|
base_model: google/gemma-2-2b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForSequenceClassification
|
model_type: AutoModelForSequenceClassification
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: EleutherAI/gpt-j-6b
|
base_model: EleutherAI/gpt-j-6b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: ai21labs/Jamba-v0.1
|
base_model: ai21labs/Jamba-v0.1
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: ai21labs/Jamba-v0.1
|
base_model: ai21labs/Jamba-v0.1
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
base_model: ai21labs/AI21-Jamba-1.5-Large
|
base_model: ai21labs/AI21-Jamba-1.5-Large
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: huggyllama/llama-7b
|
base_model: huggyllama/llama-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
- path: openaccess-ai-collective/jeopardy
|
- path: openaccess-ai-collective/jeopardy
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,13 +1,8 @@
|
|||||||
base_model: TheBloke/Llama-2-7B-GPTQ
|
base_model: TheBloke/Llama-2-7B-GPTQ
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
|
||||||
tokenizer_type: LlamaTokenizer
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
gptq: true
|
gptq: true
|
||||||
gptq_disable_exllama: true
|
gptq_disable_exllama: true
|
||||||
|
model_type: AutoModelForCausalLM
|
||||||
|
tokenizer_type: LlamaTokenizer
|
||||||
tokenizer_use_fast: true
|
tokenizer_use_fast: true
|
||||||
tokenizer_legacy: true
|
tokenizer_legacy: true
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
base_model: alpindale/Llama-3.2-11B-Vision-Instruct
|
base_model: alpindale/Llama-3.2-11B-Vision-Instruct
|
||||||
# optionally might have model_type or tokenizer_type or processor_type
|
|
||||||
processor_type: AutoProcessor
|
processor_type: AutoProcessor
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
strict: false
|
strict: false
|
||||||
|
|
||||||
# these 3 lines are needed for now to handle vision chat templates w images
|
# these 3 lines are needed for now to handle vision chat templates w images
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3.1-8B
|
base_model: NousResearch/Meta-Llama-3.1-8B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
plugins:
|
plugins:
|
||||||
- axolotl.integrations.liger.LigerPlugin
|
- axolotl.integrations.liger.LigerPlugin
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3.1-8B
|
base_model: NousResearch/Meta-Llama-3.1-8B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3-8B-Instruct
|
base_model: NousResearch/Meta-Llama-3-8B-Instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: meta-llama/Llama-3.2-1B
|
base_model: meta-llama/Llama-3.2-1B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: meta-llama/Llama-3.2-1B
|
base_model: meta-llama/Llama-3.2-1B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Llama-3.2-1B
|
base_model: NousResearch/Llama-3.2-1B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3-8B
|
base_model: NousResearch/Meta-Llama-3-8B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: meta-llama/Llama-3.2-1B
|
base_model: meta-llama/Llama-3.2-1B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Llama-3.2-1B
|
base_model: NousResearch/Llama-3.2-1B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
|
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: casperhansen/llama-3-70b-fp16
|
base_model: casperhansen/llama-3-70b-fp16
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
|
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3-8B
|
base_model: NousResearch/Meta-Llama-3-8B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
base_model: state-spaces/mamba-2.8b
|
base_model: state-spaces/mamba-2.8b
|
||||||
# optionally might have model_type or tokenizer_type or tokenizer_config
|
|
||||||
model_type: MambaLMHeadModel
|
model_type: MambaLMHeadModel
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
tokenizer_config: EleutherAI/gpt-neox-20b
|
tokenizer_config: EleutherAI/gpt-neox-20b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -4,11 +4,8 @@
|
|||||||
#face problems with the special tokens.
|
#face problems with the special tokens.
|
||||||
|
|
||||||
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
base_model: mosaicml/mpt-7b
|
base_model: mosaicml/mpt-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: openlm-research/open_llama_3b_v2
|
base_model: openlm-research/open_llama_3b_v2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: openlm-research/open_llama_3b_v2
|
base_model: openlm-research/open_llama_3b_v2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: openlm-research/open_llama_3b_v2
|
base_model: openlm-research/open_llama_3b_v2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/Phi-3.5-mini-instruct
|
base_model: microsoft/Phi-3.5-mini-instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/phi-1_5
|
base_model: microsoft/phi-1_5
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/phi-1_5
|
base_model: microsoft/phi-1_5
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/phi-2
|
base_model: microsoft/phi-2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/Phi-3-mini-4k-instruct
|
base_model: microsoft/Phi-3-mini-4k-instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,11 +1,7 @@
|
|||||||
base_model: microsoft/Phi-3-mini-4k-instruct
|
base_model: microsoft/Phi-3-mini-4k-instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
chat_template: phi_3
|
chat_template: phi_3
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,11 +1,7 @@
|
|||||||
base_model: EleutherAI/pythia-12b-deduped
|
base_model: EleutherAI/pythia-12b-deduped
|
||||||
base_model_ignore_patterns: pytorch* # prefer safetensors
|
base_model_ignore_patterns: pytorch* # prefer safetensors
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: GPTNeoXForCausalLM
|
model_type: GPTNeoXForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
gptq: false
|
gptq: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: EleutherAI/pythia-1.4b-deduped
|
base_model: EleutherAI/pythia-1.4b-deduped
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
datasets:
|
datasets:
|
||||||
- path: teknium/GPT4-LLM-Cleaned
|
- path: teknium/GPT4-LLM-Cleaned
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: Qwen/Qwen-7B
|
base_model: Qwen/Qwen-7B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: Qwen/Qwen-7B
|
base_model: Qwen/Qwen-7B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: Qwen/Qwen2.5-0.5B
|
base_model: Qwen/Qwen2.5-0.5B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
strict: false
|
strict: false
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: Qwen/Qwen2-7B
|
base_model: Qwen/Qwen2-7B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: GPTNeoXForCausalLM
|
model_type: GPTNeoXForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code:
|
trust_remote_code:
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: replit/replit-code-v1-3b
|
base_model: replit/replit-code-v1-3b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: stabilityai/stablelm-2-1_6b
|
base_model: stabilityai/stablelm-2-1_6b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: stabilityai/stablelm-2-1_6b
|
base_model: stabilityai/stablelm-2-1_6b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: bigcode/starcoder2-3b
|
base_model: bigcode/starcoder2-3b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: TinyLlama/TinyLlama_v1.1
|
base_model: TinyLlama/TinyLlama_v1.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
base_model: TinyLlama/TinyLlama_v1.1
|
base_model: TinyLlama/TinyLlama_v1.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,7 @@
|
|||||||
base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: TinyLlama/TinyLlama_v1.1
|
base_model: TinyLlama/TinyLlama_v1.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,14 +1,9 @@
|
|||||||
# An example finetuning Saleforce's XGen-7b model with 8k context using qlora
|
# An example finetuning Saleforce's XGen-7b model with 8k context using qlora
|
||||||
# on Tim Dettmer's Guanaco dataset.
|
# on Tim Dettmer's Guanaco dataset.
|
||||||
base_model: Salesforce/xgen-7b-8k-base
|
base_model: Salesforce/xgen-7b-8k-base
|
||||||
# optionally might have model_type or tokenizer_type
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
# enable 4bit for QLoRA
|
# enable 4bit for QLoRA
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: 01-ai/Yi-34B-Chat
|
base_model: 01-ai/Yi-34B-Chat
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -7,31 +7,26 @@ mamba-ssm==1.2.0.post1
|
|||||||
flash-attn==2.7.0.post2
|
flash-attn==2.7.0.post2
|
||||||
xformers>=0.0.23.post1
|
xformers>=0.0.23.post1
|
||||||
autoawq==0.2.7.post3
|
autoawq==0.2.7.post3
|
||||||
liger-kernel==0.5.2
|
liger-kernel==0.4.2
|
||||||
# END section
|
# END section
|
||||||
|
|
||||||
packaging==23.2
|
packaging==23.2
|
||||||
|
|
||||||
peft==0.14.0
|
peft==0.14.0
|
||||||
transformers==4.47.1
|
transformers==4.47.0
|
||||||
tokenizers>=0.20.1
|
tokenizers>=0.20.1
|
||||||
accelerate==1.2.1
|
accelerate==1.2.0
|
||||||
datasets==3.1.0
|
datasets==3.1.0
|
||||||
deepspeed==0.16.1
|
deepspeed==0.16.1
|
||||||
trl==0.12.1
|
|
||||||
|
|
||||||
optimum==1.16.2
|
|
||||||
hf_transfer
|
|
||||||
sentencepiece
|
|
||||||
gradio==3.50.2
|
|
||||||
|
|
||||||
pydantic==2.6.3
|
pydantic==2.6.3
|
||||||
addict
|
addict
|
||||||
fire
|
fire
|
||||||
PyYAML>=6.0
|
PyYAML>=6.0
|
||||||
requests
|
requests
|
||||||
|
sentencepiece
|
||||||
wandb
|
wandb
|
||||||
einops
|
einops
|
||||||
|
optimum==1.16.2
|
||||||
|
hf_transfer
|
||||||
colorama
|
colorama
|
||||||
numba
|
numba
|
||||||
numpy>=1.24.4,<=2.0.1
|
numpy>=1.24.4,<=2.0.1
|
||||||
@@ -41,6 +36,7 @@ scipy
|
|||||||
scikit-learn==1.4.2
|
scikit-learn==1.4.2
|
||||||
nvidia-ml-py==12.560.30
|
nvidia-ml-py==12.560.30
|
||||||
art
|
art
|
||||||
|
gradio==3.50.2
|
||||||
tensorboard
|
tensorboard
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
|
|
||||||
@@ -49,6 +45,7 @@ s3fs>=2024.5.0
|
|||||||
gcsfs>=2024.5.0
|
gcsfs>=2024.5.0
|
||||||
# adlfs
|
# adlfs
|
||||||
|
|
||||||
|
trl==0.12.1
|
||||||
zstandard==0.22.0
|
zstandard==0.22.0
|
||||||
fastcore
|
fastcore
|
||||||
|
|
||||||
@@ -58,7 +55,5 @@ langdetect==1.0.9
|
|||||||
immutabledict==4.2.0
|
immutabledict==4.2.0
|
||||||
antlr4-python3-runtime==4.13.2
|
antlr4-python3-runtime==4.13.2
|
||||||
|
|
||||||
torchao==0.7.0
|
torchao==0.5.0
|
||||||
schedulefree==1.3.0
|
schedulefree==1.3.0
|
||||||
|
|
||||||
axolotl-contribs-lgpl==0.0.1b2
|
|
||||||
|
|||||||
@@ -32,5 +32,5 @@ else:
|
|||||||
raise RuntimeError(f"Torch = {v} too new!")
|
raise RuntimeError(f"Torch = {v} too new!")
|
||||||
x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
|
x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
|
||||||
print(
|
print(
|
||||||
f'pip install unsloth-zoo==2024.12.1 && pip install --no-deps "unsloth[{x}]==2024.12.4"'
|
f'pip install unsloth-zoo==2024.11.7 && pip install --no-deps "unsloth[{x}]==2024.11.9"'
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,7 +1,3 @@
|
|||||||
"""Axolotl - Train and fine-tune large language models"""
|
"""Axolotl - Train and fine-tune large language models"""
|
||||||
|
|
||||||
import pkgutil
|
|
||||||
|
|
||||||
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
|
|
||||||
|
|
||||||
__version__ = "0.6.0"
|
__version__ = "0.6.0"
|
||||||
|
|||||||
@@ -1,52 +0,0 @@
|
|||||||
"""
|
|
||||||
CLI to run training on a model
|
|
||||||
"""
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, Union
|
|
||||||
|
|
||||||
import fire
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from transformers.hf_argparser import HfArgumentParser
|
|
||||||
|
|
||||||
from axolotl.cli import (
|
|
||||||
check_accelerate_default_config,
|
|
||||||
check_user_token,
|
|
||||||
load_cfg,
|
|
||||||
load_datasets,
|
|
||||||
load_rl_datasets,
|
|
||||||
print_axolotl_text_art,
|
|
||||||
)
|
|
||||||
from axolotl.common.cli import TrainerCliArgs
|
|
||||||
from axolotl.evaluate import evaluate
|
|
||||||
|
|
||||||
LOG = logging.getLogger("axolotl.cli.evaluate")
|
|
||||||
|
|
||||||
|
|
||||||
def do_evaluate(cfg, cli_args) -> Dict[str, float]:
|
|
||||||
# pylint: disable=duplicate-code
|
|
||||||
print_axolotl_text_art()
|
|
||||||
check_accelerate_default_config()
|
|
||||||
check_user_token()
|
|
||||||
|
|
||||||
if cfg.rl: # and cfg.rl != "orpo":
|
|
||||||
dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args)
|
|
||||||
else:
|
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
|
||||||
|
|
||||||
return evaluate(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
|
|
||||||
|
|
||||||
|
|
||||||
def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs) -> None:
|
|
||||||
# pylint: disable=duplicate-code
|
|
||||||
parsed_cfg = load_cfg(config, **kwargs)
|
|
||||||
parser = HfArgumentParser(TrainerCliArgs)
|
|
||||||
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
|
|
||||||
return_remaining_strings=True
|
|
||||||
)
|
|
||||||
do_evaluate(parsed_cfg, parsed_cli_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
load_dotenv()
|
|
||||||
fire.Fire(do_cli)
|
|
||||||
@@ -1,207 +0,0 @@
|
|||||||
"""CLI to convert a transformers model's attns to diff attns."""
|
|
||||||
import logging
|
|
||||||
import warnings
|
|
||||||
from pathlib import Path
|
|
||||||
from time import time
|
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
import fire
|
|
||||||
import torch
|
|
||||||
import yaml
|
|
||||||
from colorama import Fore
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from transformers import HfArgumentParser
|
|
||||||
|
|
||||||
from axolotl.cli import load_cfg, print_axolotl_text_art
|
|
||||||
from axolotl.common.cli import ConvertDiffTransformerCliArgs, load_model_and_tokenizer
|
|
||||||
from axolotl.integrations.diff_transformer.convert import convert_to_diff_attn
|
|
||||||
from axolotl.utils.yaml import dump_yaml_preserved_order
|
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def test_inference(model, tokenizer, prompt="The quick brown fox"):
|
|
||||||
"""Run test inference and return generation time"""
|
|
||||||
try:
|
|
||||||
inputs = tokenizer(prompt, return_tensors="pt")
|
|
||||||
inputs = {
|
|
||||||
k: v.to(device=model.device, dtype=torch.long) for k, v in inputs.items()
|
|
||||||
}
|
|
||||||
|
|
||||||
start = time()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model.generate(
|
|
||||||
**inputs,
|
|
||||||
max_new_tokens=20,
|
|
||||||
num_beams=1,
|
|
||||||
do_sample=False,
|
|
||||||
pad_token_id=tokenizer.pad_token_id,
|
|
||||||
use_cache=False,
|
|
||||||
)
|
|
||||||
elapsed = time() - start
|
|
||||||
|
|
||||||
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
||||||
LOG.info("Prompt: %s", prompt)
|
|
||||||
LOG.info("Generated: %s", generated_text)
|
|
||||||
LOG.info("Generation time: %.2fs", elapsed)
|
|
||||||
|
|
||||||
return elapsed, generated_text
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
LOG.error("Inference failed: %s", str(exc))
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def convert_diff_transformer(cfg, cli_args, config_path):
|
|
||||||
debug_info = {}
|
|
||||||
|
|
||||||
# Load model and tokenizer
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
|
|
||||||
model.to(cfg.device, dtype=cfg.torch_dtype)
|
|
||||||
|
|
||||||
# Log original model info
|
|
||||||
LOG.info(
|
|
||||||
"Original model config:\n\t- Hidden size: %d\n\t- Num attention heads: %d",
|
|
||||||
model.config.hidden_size,
|
|
||||||
model.config.num_attention_heads,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test original model
|
|
||||||
if cli_args.debug:
|
|
||||||
LOG.info("Testing original model...")
|
|
||||||
debug_info["orig_time"], debug_info["orig_text"] = test_inference(
|
|
||||||
model, tokenizer
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert attention
|
|
||||||
LOG.info("Converting to differential attention...")
|
|
||||||
if cli_args.split_heads and cli_args.zero_init:
|
|
||||||
LOG.warning(
|
|
||||||
Fore.YELLOW
|
|
||||||
+ "Warning: Using split_heads with zero_init is not recommended; "
|
|
||||||
+ "split_heads will preclude the effects of zero_init"
|
|
||||||
+ Fore.RESET
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
model = convert_to_diff_attn(
|
|
||||||
model=model,
|
|
||||||
zero_init=cli_args.zero_init,
|
|
||||||
sublayer_norm=cli_args.sublayer_norm,
|
|
||||||
split_heads=cli_args.split_heads,
|
|
||||||
)
|
|
||||||
model.to(cfg.device, dtype=cfg.torch_dtype)
|
|
||||||
except Exception as exc:
|
|
||||||
LOG.error(Fore.RED + "Conversion failed: %s" + Fore.RESET, str(exc))
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Test converted model
|
|
||||||
if cli_args.debug:
|
|
||||||
LOG.info("Testing converted model...")
|
|
||||||
debug_info["conv_time"], debug_info["conv_text"] = test_inference(
|
|
||||||
model, tokenizer
|
|
||||||
)
|
|
||||||
|
|
||||||
# Save if requested
|
|
||||||
if cfg.output_dir:
|
|
||||||
# Save model and tokenizer
|
|
||||||
LOG.info("Saving converted model to %s", cfg.output_dir)
|
|
||||||
model.save_pretrained(cfg.output_dir)
|
|
||||||
tokenizer.save_pretrained(cfg.output_dir)
|
|
||||||
|
|
||||||
# Modify config to reflect new path / differential attention
|
|
||||||
output_config_path = Path(cfg.output_dir) / "axolotl_config.yml"
|
|
||||||
LOG.info("Saving updated config to %s", output_config_path)
|
|
||||||
|
|
||||||
with open(config_path, "r", encoding="utf-8") as file:
|
|
||||||
modified_cfg = yaml.safe_load(file) or {}
|
|
||||||
|
|
||||||
modified_cfg["base_model"] = cfg.output_dir
|
|
||||||
modified_cfg["diff_attention"] = True
|
|
||||||
plugin_class = (
|
|
||||||
"axolotl.integrations.diff_transformer.DifferentialTransformerPlugin"
|
|
||||||
)
|
|
||||||
if "plugins" in modified_cfg:
|
|
||||||
modified_cfg["plugins"].append(plugin_class)
|
|
||||||
else:
|
|
||||||
modified_cfg["plugins"] = [plugin_class]
|
|
||||||
|
|
||||||
dump_yaml_preserved_order(
|
|
||||||
data=modified_cfg,
|
|
||||||
reference_yaml_path=config_path,
|
|
||||||
output_path=output_config_path,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
LOG.info("Not saving converted model to disk")
|
|
||||||
LOG.info("Pass --output-dir path/to/save to save model")
|
|
||||||
|
|
||||||
if cli_args.debug:
|
|
||||||
LOG.info(
|
|
||||||
Fore.GREEN
|
|
||||||
+ "Conversion successful!\n"
|
|
||||||
+ f"Original generation time: {debug_info['orig_time']:.2f}s\n"
|
|
||||||
+ f"Converted generation time: {debug_info['conv_time']:.2f}s"
|
|
||||||
+ Fore.RESET
|
|
||||||
)
|
|
||||||
|
|
||||||
if debug_info["orig_text"] == debug_info["conv_text"]:
|
|
||||||
LOG.info(
|
|
||||||
Fore.GREEN
|
|
||||||
+ "Generations match!\n"
|
|
||||||
+ "Model generation:\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ f"{debug_info['orig_text']}\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ Fore.RESET
|
|
||||||
)
|
|
||||||
debug_info["generations_match"] = True
|
|
||||||
else:
|
|
||||||
message = (
|
|
||||||
"Generations do not match.\n"
|
|
||||||
+ "Original generation:\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ f"{debug_info['orig_text']}\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ "Converted generation:\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ f"{debug_info['conv_text']}\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
)
|
|
||||||
debug_info["generations_match"] = False
|
|
||||||
|
|
||||||
if cli_args.zero_init and not cli_args.sublayer_norm:
|
|
||||||
LOG.info(Fore.RED + message + Fore.RESET)
|
|
||||||
debug_info["match_expected"] = True
|
|
||||||
else:
|
|
||||||
LOG.info(
|
|
||||||
Fore.YELLOW
|
|
||||||
+ message
|
|
||||||
+ "However, this is expected since --zero-init"
|
|
||||||
+ " and --no-sublayer-norm were not passed."
|
|
||||||
+ Fore.RESET
|
|
||||||
)
|
|
||||||
debug_info["match_expected"] = False
|
|
||||||
|
|
||||||
return model, debug_info
|
|
||||||
|
|
||||||
|
|
||||||
def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
|
|
||||||
print_axolotl_text_art()
|
|
||||||
|
|
||||||
cfg = load_cfg(config, **kwargs)
|
|
||||||
parser = HfArgumentParser(ConvertDiffTransformerCliArgs)
|
|
||||||
cli_args, _ = parser.parse_args_into_dataclasses(return_remaining_strings=True)
|
|
||||||
|
|
||||||
convert_diff_transformer(cfg, cli_args, config)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
load_dotenv()
|
|
||||||
fire.Fire(do_cli)
|
|
||||||
@@ -1,197 +0,0 @@
|
|||||||
"""CLI to convert a transformers model's attns to rala attns."""
|
|
||||||
import logging
|
|
||||||
import warnings
|
|
||||||
from pathlib import Path
|
|
||||||
from time import time
|
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
import fire
|
|
||||||
import torch
|
|
||||||
import yaml
|
|
||||||
from colorama import Fore
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from transformers import HfArgumentParser
|
|
||||||
|
|
||||||
from axolotl.cli import load_cfg, print_axolotl_text_art
|
|
||||||
from axolotl.common.cli import ConvertDiffTransformerCliArgs, load_model_and_tokenizer
|
|
||||||
from axolotl.integrations.rala.convert import convert_to_rala
|
|
||||||
from axolotl.utils.yaml import dump_yaml_preserved_order
|
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def test_inference(model, tokenizer, prompt="The quick brown fox"):
|
|
||||||
"""Run test inference and return generation time"""
|
|
||||||
try:
|
|
||||||
inputs = tokenizer(prompt, return_tensors="pt")
|
|
||||||
inputs = {
|
|
||||||
k: v.to(device=model.device, dtype=torch.long) for k, v in inputs.items()
|
|
||||||
}
|
|
||||||
|
|
||||||
start = time()
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = model.generate(
|
|
||||||
**inputs,
|
|
||||||
max_new_tokens=20,
|
|
||||||
num_beams=1,
|
|
||||||
do_sample=False,
|
|
||||||
pad_token_id=tokenizer.pad_token_id,
|
|
||||||
use_cache=False,
|
|
||||||
)
|
|
||||||
elapsed = time() - start
|
|
||||||
|
|
||||||
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
||||||
LOG.info("Prompt: %s", prompt)
|
|
||||||
LOG.info("Generated: %s", generated_text)
|
|
||||||
LOG.info("Generation time: %.2fs", elapsed)
|
|
||||||
|
|
||||||
return elapsed, generated_text
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
LOG.error("Inference failed: %s", str(exc))
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def convert_rala(cfg, cli_args, config_path):
|
|
||||||
debug_info = {}
|
|
||||||
|
|
||||||
# Load model and tokenizer
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
|
|
||||||
model.to(cfg.device, dtype=cfg.torch_dtype)
|
|
||||||
|
|
||||||
# Log original model info
|
|
||||||
LOG.info(
|
|
||||||
"Original model config:\n\t- Hidden size: %d\n\t- Num attention heads: %d",
|
|
||||||
model.config.hidden_size,
|
|
||||||
model.config.num_attention_heads,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test original model
|
|
||||||
if cli_args.debug:
|
|
||||||
LOG.info("attention layers to RALA attention")
|
|
||||||
debug_info["orig_time"], debug_info["orig_text"] = test_inference(
|
|
||||||
model, tokenizer
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert attention
|
|
||||||
try:
|
|
||||||
model = convert_to_rala(
|
|
||||||
model=model,
|
|
||||||
zero_init=cli_args.zero_init,
|
|
||||||
)
|
|
||||||
model.to(cfg.device, dtype=cfg.torch_dtype)
|
|
||||||
except Exception as exc:
|
|
||||||
LOG.error(Fore.RED + "Conversion failed: %s" + Fore.RESET, str(exc))
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Test converted model
|
|
||||||
if cli_args.debug:
|
|
||||||
LOG.info("Testing converted model...")
|
|
||||||
debug_info["conv_time"], debug_info["conv_text"] = test_inference(
|
|
||||||
model, tokenizer
|
|
||||||
)
|
|
||||||
|
|
||||||
# Save if requested
|
|
||||||
if cfg.output_dir:
|
|
||||||
# Save model and tokenizer
|
|
||||||
LOG.info("Saving converted model to %s", cfg.output_dir)
|
|
||||||
model.save_pretrained(cfg.output_dir)
|
|
||||||
tokenizer.save_pretrained(cfg.output_dir)
|
|
||||||
|
|
||||||
# Modify config to reflect new path / differential attention
|
|
||||||
output_config_path = Path(cfg.output_dir) / "axolotl_config.yml"
|
|
||||||
LOG.info("Saving updated config to %s", output_config_path)
|
|
||||||
|
|
||||||
with open(config_path, "r", encoding="utf-8") as file:
|
|
||||||
modified_cfg = yaml.safe_load(file) or {}
|
|
||||||
|
|
||||||
modified_cfg["base_model"] = cfg.output_dir
|
|
||||||
modified_cfg["rala_attention"] = True
|
|
||||||
plugin_class = "axolotl.integrations.rala.RalaPlugin"
|
|
||||||
if "plugins" in modified_cfg:
|
|
||||||
modified_cfg["plugins"].append(plugin_class)
|
|
||||||
else:
|
|
||||||
modified_cfg["plugins"] = [plugin_class]
|
|
||||||
|
|
||||||
dump_yaml_preserved_order(
|
|
||||||
data=modified_cfg,
|
|
||||||
reference_yaml_path=config_path,
|
|
||||||
output_path=output_config_path,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
LOG.info("Not saving converted model to disk")
|
|
||||||
LOG.info("Pass --output-dir path/to/save to save model")
|
|
||||||
|
|
||||||
if cli_args.debug:
|
|
||||||
LOG.info(
|
|
||||||
Fore.GREEN
|
|
||||||
+ "Conversion successful!\n"
|
|
||||||
+ f"Original generation time: {debug_info['orig_time']:.2f}s\n"
|
|
||||||
+ f"Converted generation time: {debug_info['conv_time']:.2f}s"
|
|
||||||
+ Fore.RESET
|
|
||||||
)
|
|
||||||
|
|
||||||
if debug_info["orig_text"] == debug_info["conv_text"]:
|
|
||||||
LOG.info(
|
|
||||||
Fore.GREEN
|
|
||||||
+ "Generations match!\n"
|
|
||||||
+ "Model generation:\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ f"{debug_info['orig_text']}\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ Fore.RESET
|
|
||||||
)
|
|
||||||
debug_info["generations_match"] = True
|
|
||||||
else:
|
|
||||||
message = (
|
|
||||||
"Generations do not match.\n"
|
|
||||||
+ "Original generation:\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ f"{debug_info['orig_text']}\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ "Converted generation:\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
+ f"{debug_info['conv_text']}\n"
|
|
||||||
+ "*" * 50
|
|
||||||
+ "\n"
|
|
||||||
)
|
|
||||||
debug_info["generations_match"] = False
|
|
||||||
|
|
||||||
if cli_args.zero_init and not cli_args.sublayer_norm:
|
|
||||||
LOG.info(Fore.RED + message + Fore.RESET)
|
|
||||||
debug_info["match_expected"] = True
|
|
||||||
else:
|
|
||||||
LOG.info(
|
|
||||||
Fore.YELLOW
|
|
||||||
+ message
|
|
||||||
+ "However, this is expected since --zero-init"
|
|
||||||
+ " and --no-sublayer-norm were not passed."
|
|
||||||
+ Fore.RESET
|
|
||||||
)
|
|
||||||
debug_info["match_expected"] = False
|
|
||||||
|
|
||||||
return model, debug_info
|
|
||||||
|
|
||||||
|
|
||||||
def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
|
|
||||||
print_axolotl_text_art()
|
|
||||||
|
|
||||||
cfg = load_cfg(config, **kwargs)
|
|
||||||
if cfg.rala_attention:
|
|
||||||
cfg.rala_attention = False
|
|
||||||
parser = HfArgumentParser(ConvertDiffTransformerCliArgs)
|
|
||||||
cli_args, _ = parser.parse_args_into_dataclasses(return_remaining_strings=True)
|
|
||||||
|
|
||||||
convert_rala(cfg, cli_args, config)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
load_dotenv()
|
|
||||||
fire.Fire(do_cli)
|
|
||||||
@@ -12,13 +12,7 @@ from axolotl.cli.utils import (
|
|||||||
build_command,
|
build_command,
|
||||||
fetch_from_github,
|
fetch_from_github,
|
||||||
)
|
)
|
||||||
from axolotl.common.cli import (
|
from axolotl.common.cli import PreprocessCliArgs, TrainerCliArgs
|
||||||
ConvertDiffTransformerCliArgs,
|
|
||||||
EvaluateCliArgs,
|
|
||||||
PreprocessCliArgs,
|
|
||||||
TrainerCliArgs,
|
|
||||||
)
|
|
||||||
from axolotl.utils import set_pytorch_cuda_alloc_conf
|
|
||||||
from axolotl.utils.config.models.input.v0_4_1 import AxolotlInputConfig
|
from axolotl.utils.config.models.input.v0_4_1 import AxolotlInputConfig
|
||||||
|
|
||||||
|
|
||||||
@@ -54,9 +48,6 @@ def train(config: str, accelerate: bool, **kwargs):
|
|||||||
"""Train or fine-tune a model."""
|
"""Train or fine-tune a model."""
|
||||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
||||||
|
|
||||||
# Enable expandable segments for cuda allocation to improve VRAM usage
|
|
||||||
set_pytorch_cuda_alloc_conf()
|
|
||||||
|
|
||||||
if accelerate:
|
if accelerate:
|
||||||
base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.train"]
|
base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.train"]
|
||||||
if config:
|
if config:
|
||||||
@@ -69,34 +60,6 @@ def train(config: str, accelerate: bool, **kwargs):
|
|||||||
do_cli(config=config, **kwargs)
|
do_cli(config=config, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
|
||||||
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
|
||||||
@click.option(
|
|
||||||
"--accelerate/--no-accelerate",
|
|
||||||
default=True,
|
|
||||||
help="Use accelerate launch for multi-GPU training",
|
|
||||||
)
|
|
||||||
@add_options_from_dataclass(EvaluateCliArgs)
|
|
||||||
@add_options_from_config(AxolotlInputConfig)
|
|
||||||
def evaluate(config: str, accelerate: bool, **kwargs):
|
|
||||||
"""Evaluate a model."""
|
|
||||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
||||||
|
|
||||||
# Enable expandable segments for cuda allocation to improve VRAM usage
|
|
||||||
set_pytorch_cuda_alloc_conf()
|
|
||||||
|
|
||||||
if accelerate:
|
|
||||||
base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.evaluate"]
|
|
||||||
if config:
|
|
||||||
base_cmd.append(config)
|
|
||||||
cmd = build_command(base_cmd, kwargs)
|
|
||||||
subprocess.run(cmd, check=True) # nosec B603
|
|
||||||
else:
|
|
||||||
from axolotl.cli.evaluate import do_cli
|
|
||||||
|
|
||||||
do_cli(config=config, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
||||||
@click.option(
|
@click.option(
|
||||||
@@ -248,32 +211,6 @@ def merge_lora(
|
|||||||
do_cli(config=config, **kwargs)
|
do_cli(config=config, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
|
||||||
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
|
||||||
@add_options_from_dataclass(ConvertDiffTransformerCliArgs)
|
|
||||||
@add_options_from_config(AxolotlInputConfig)
|
|
||||||
def convert_diff_transformer(config: str, **kwargs):
|
|
||||||
"""Convert model attention layers to differential attention layers."""
|
|
||||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
||||||
|
|
||||||
from axolotl.cli.integrations.convert_diff_transformer import do_cli
|
|
||||||
|
|
||||||
do_cli(config=config, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
|
||||||
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
|
||||||
@add_options_from_dataclass(ConvertDiffTransformerCliArgs)
|
|
||||||
@add_options_from_config(AxolotlInputConfig)
|
|
||||||
def convert_rala(config: str, **kwargs):
|
|
||||||
"""Convert model attention layers to RALA attention layers."""
|
|
||||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
||||||
|
|
||||||
from axolotl.cli.integrations.convert_rala import do_cli
|
|
||||||
|
|
||||||
do_cli(config=config, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
@click.argument("directory", type=click.Choice(["examples", "deepspeed_configs"]))
|
@click.argument("directory", type=click.Choice(["examples", "deepspeed_configs"]))
|
||||||
@click.option("--dest", help="Destination directory")
|
@click.option("--dest", help="Destination directory")
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ def add_options_from_dataclass(config_class: Type[Any]):
|
|||||||
# Process dataclass fields in reverse order for correct option ordering
|
# Process dataclass fields in reverse order for correct option ordering
|
||||||
for field in reversed(dataclasses.fields(config_class)):
|
for field in reversed(dataclasses.fields(config_class)):
|
||||||
field_type = field.type
|
field_type = field.type
|
||||||
|
|
||||||
if get_origin(field_type) is Union and type(None) in get_args(field_type):
|
if get_origin(field_type) is Union and type(None) in get_args(field_type):
|
||||||
field_type = next(
|
field_type = next(
|
||||||
t for t in get_args(field_type) if not isinstance(t, NoneType)
|
t for t in get_args(field_type) if not isinstance(t, NoneType)
|
||||||
@@ -43,7 +44,6 @@ def add_options_from_dataclass(config_class: Type[Any]):
|
|||||||
default=field.default,
|
default=field.default,
|
||||||
help=field.metadata.get("description"),
|
help=field.metadata.get("description"),
|
||||||
)(function)
|
)(function)
|
||||||
|
|
||||||
return function
|
return function
|
||||||
|
|
||||||
return decorator
|
return decorator
|
||||||
@@ -55,14 +55,7 @@ def add_options_from_config(config_class: Type[BaseModel]):
|
|||||||
def decorator(function):
|
def decorator(function):
|
||||||
# Process model fields in reverse order for correct option ordering
|
# Process model fields in reverse order for correct option ordering
|
||||||
for name, field in reversed(config_class.model_fields.items()):
|
for name, field in reversed(config_class.model_fields.items()):
|
||||||
field_type = field.annotation
|
if field.annotation == bool:
|
||||||
if get_origin(field_type) is Union and type(None) in get_args(field_type):
|
|
||||||
field_type = next(
|
|
||||||
t for t in get_args(field_type) if not isinstance(t, NoneType)
|
|
||||||
)
|
|
||||||
|
|
||||||
# NOTE: defaults are handled by the pydantic model config classes.
|
|
||||||
if field_type == bool:
|
|
||||||
field_name = name.replace("_", "-")
|
field_name = name.replace("_", "-")
|
||||||
option_name = f"--{field_name}/--no-{field_name}"
|
option_name = f"--{field_name}/--no-{field_name}"
|
||||||
function = click.option(
|
function = click.option(
|
||||||
@@ -73,7 +66,6 @@ def add_options_from_config(config_class: Type[BaseModel]):
|
|||||||
function = click.option(
|
function = click.option(
|
||||||
option_name, default=None, help=field.description
|
option_name, default=None, help=field.description
|
||||||
)(function)
|
)(function)
|
||||||
|
|
||||||
return function
|
return function
|
||||||
|
|
||||||
return decorator
|
return decorator
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user