Compare commits
39 Commits
base-model
...
rala
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d3f4b9ab5 | ||
|
|
12aade921a | ||
|
|
198f01f902 | ||
|
|
2e6265090f | ||
|
|
1c5b78621c | ||
|
|
2717b97103 | ||
|
|
e0adf11b76 | ||
|
|
544f2a8a27 | ||
|
|
d4e29e5b67 | ||
|
|
80ba0d8dd1 | ||
|
|
dda9b25994 | ||
|
|
0e9c0c6680 | ||
|
|
b7cc117394 | ||
|
|
1fadc5cfe5 | ||
|
|
6425d052bc | ||
|
|
594c42f169 | ||
|
|
ae494776e4 | ||
|
|
503c4e9ffa | ||
|
|
845dbede53 | ||
|
|
7108ca72b4 | ||
|
|
af1d8d69af | ||
|
|
e162d36fe9 | ||
|
|
7af20b52d6 | ||
|
|
866d7b3040 | ||
|
|
23ac14540b | ||
|
|
42bd32a233 | ||
|
|
5b8fb5e939 | ||
|
|
bd2a594b89 | ||
|
|
3798229d85 | ||
|
|
10cfecf02e | ||
|
|
339f3c67e2 | ||
|
|
d91feaffc8 | ||
|
|
e246ceffa4 | ||
|
|
8ddc18ec8d | ||
|
|
1c14c4a15c | ||
|
|
1f623e6cc8 | ||
|
|
f865464ae5 | ||
|
|
33090486d7 | ||
|
|
effc4dc409 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,6 +1,7 @@
|
||||
**/axolotl.egg-info
|
||||
configs
|
||||
last_run_prepared/
|
||||
outputs
|
||||
.vscode
|
||||
_site/
|
||||
|
||||
@@ -185,3 +186,6 @@ out/
|
||||
|
||||
# vim
|
||||
*.swp
|
||||
|
||||
# symlinked to axolotl-artifacts in docker containers
|
||||
outputs
|
||||
|
||||
@@ -4,7 +4,6 @@ set -e
|
||||
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
||||
|
||||
pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
|
||||
# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
|
||||
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/patched/
|
||||
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/integrations/
|
||||
pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
|
||||
pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
|
||||
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""
|
||||
modal application to run axolotl gpu tests in Modal
|
||||
"""
|
||||
modal application to run axolotl gpu tests in Modal
|
||||
"""
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
import os
|
||||
|
||||
@@ -127,34 +127,40 @@ datasets:
|
||||
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
|
||||
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
|
||||
chat_template: tokenizer_default
|
||||
# Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
|
||||
|
||||
# Custom jinja chat template. Used only if `chat_template: jinja` or empty.
|
||||
chat_template_jinja:
|
||||
# The key in the data example that contains the messages. Default is "messages".
|
||||
|
||||
# Key containing the messages (default: "messages")
|
||||
field_messages: messages
|
||||
# The key in the message turn that contains the role. Default is "role".
|
||||
# Key for role in each message (default: "role")
|
||||
message_field_role: role
|
||||
# The key in the message turn that contains the content. Default is "content".
|
||||
# Key for content in each message (default: "content")
|
||||
message_field_content: content
|
||||
# Optional[Dict[str, List]]. Roles mapping for the messages.
|
||||
|
||||
# Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
|
||||
roles:
|
||||
user: ["human", "user"]
|
||||
assistant: ["gpt", "assistant", "ai"]
|
||||
assistant: ["gpt", "assistant"]
|
||||
system: ["system"]
|
||||
tool: ["tool"]
|
||||
|
||||
## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.
|
||||
# IMPORTANT: The following fields determine which parts of the conversation to train on.
|
||||
# Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
|
||||
# See examples at `docs/dataset-formats/conversation.qmd`
|
||||
# Note: If the below 4 fields are empty, defaults to training only on the last message.
|
||||
|
||||
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
|
||||
roles_to_train: ["gpt", "assistant"]
|
||||
roles_to_train: ["assistant"] # default
|
||||
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
|
||||
# - all: train on all EOS tokens
|
||||
# - turn: train on the EOS token at the end of each trainable turn
|
||||
# - turn (default): train on the EOS token at the end of each trainable turn
|
||||
# - last: train on the last EOS token in the conversation
|
||||
train_on_eos: last
|
||||
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
|
||||
message_field_training: training
|
||||
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
|
||||
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
|
||||
# See example at `docs/dataset-formats/conversation.qmd`
|
||||
message_field_training_detail: train_detail
|
||||
|
||||
|
||||
@@ -239,6 +245,9 @@ sample_packing_group_size: 100000
|
||||
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
|
||||
sample_packing_bin_size: 200
|
||||
|
||||
# Use batch flattening for speedups when not using sample_packing
|
||||
batch_flattening:
|
||||
|
||||
# Passed through to transformers when loading the model when launched without accelerate
|
||||
# Use `sequential` when training w/ model parallelism to limit memory
|
||||
device_map:
|
||||
@@ -331,7 +340,8 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
|
||||
output_dir: ./completed-model
|
||||
|
||||
# Whether to use torch.compile and which backend to use
|
||||
torch_compile: # bool
|
||||
# setting to `auto` will enable torch compile when torch>=2.5.1
|
||||
torch_compile: # Optional[Union[Literal["auto"], bool]]
|
||||
torch_compile_backend: # Optional[str]
|
||||
|
||||
# Training hyperparameters
|
||||
@@ -363,6 +373,10 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
|
||||
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
|
||||
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
|
||||
|
||||
profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
|
||||
# see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
|
||||
# snapshots can be visualized @ https://pytorch.org/memory_viz
|
||||
|
||||
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
|
||||
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
|
||||
|
||||
|
||||
@@ -68,6 +68,8 @@ We recommend checking the below examples for other usecases.
|
||||
datasets:
|
||||
- path: ...
|
||||
type: chat_template
|
||||
roles_to_train:
|
||||
train_on_eos:
|
||||
```
|
||||
|
||||
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
|
||||
@@ -77,7 +79,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
|
||||
datasets:
|
||||
- path: ...
|
||||
type: chat_template
|
||||
roles_to_train: ["assistant"]
|
||||
roles_to_train: ["assistant"] # default value
|
||||
```
|
||||
|
||||
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
|
||||
@@ -87,7 +89,6 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
|
||||
datasets:
|
||||
- path: ...
|
||||
type: chat_template
|
||||
roles_to_train: ["assistant"]
|
||||
```
|
||||
|
||||
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
|
||||
@@ -99,7 +100,6 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
|
||||
datasets:
|
||||
- path: ...
|
||||
type: chat_template
|
||||
roles_to_train: ["assistant"]
|
||||
```
|
||||
|
||||
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: cerebras/btlm-3b-8k-base
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: GPT2Tokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
tokenizer_use_fast: true
|
||||
tokenizer_legacy: true
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: cerebras/Cerebras-GPT-1.3B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: codellama/CodeLlama-13b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: codellama/CodeLlama-13b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: codellama/CodeLlama-34b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: codellama/CodeLlama-34b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: codellama/CodeLlama-7b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: codellama/CodeLlama-7b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: CodeLlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: LnL-AI/dbrx-base-converted-v2
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: LnL-AI/dbrx-base-converted-v2
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: true
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: LnL-AI/dbrx-base-converted-v2
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
base_model: deepseek-ai/DeepSeek-V2-Lite
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
base_model: tiiuae/falcon-7b
|
||||
trust_remote_code: true
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
# 1b: tiiuae/falcon-rw-1b
|
||||
# 40b: tiiuae/falcon-40b
|
||||
base_model: tiiuae/falcon-7b
|
||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
||||
trust_remote_code: true
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
||||
trust_remote_code: true
|
||||
|
||||
|
||||
load_in_8bit: false
|
||||
# enable 4bit for QLoRA
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
base_model: tiiuae/falcon-7b
|
||||
trust_remote_code: true
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
# use google/gemma-7b if you have access
|
||||
base_model: mhenrichsen/gemma-7b
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: google/gemma-2-9b
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: google/gemma-2-2b
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForSequenceClassification
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: EleutherAI/gpt-j-6b
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: ai21labs/Jamba-v0.1
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
base_model: ai21labs/Jamba-v0.1
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
base_model: ai21labs/AI21-Jamba-1.5-Large
|
||||
# optionally might have model_type or tokenizer_type
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: huggyllama/llama-7b
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
datasets:
|
||||
- path: openaccess-ai-collective/jeopardy
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
base_model: TheBloke/Llama-2-7B-GPTQ
|
||||
gptq: true
|
||||
gptq_disable_exllama: true
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
gptq: true
|
||||
gptq_disable_exllama: true
|
||||
|
||||
tokenizer_use_fast: true
|
||||
tokenizer_legacy: true
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: NousResearch/Llama-2-7b-hf
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
base_model: alpindale/Llama-3.2-11B-Vision-Instruct
|
||||
# optionally might have model_type or tokenizer_type or processor_type
|
||||
processor_type: AutoProcessor
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
strict: false
|
||||
|
||||
# these 3 lines are needed for now to handle vision chat templates w images
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
base_model: NousResearch/Meta-Llama-3.1-8B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
plugins:
|
||||
- axolotl.integrations.liger.LigerPlugin
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
base_model: NousResearch/Meta-Llama-3.1-8B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: NousResearch/Meta-Llama-3-8B-Instruct
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: meta-llama/Llama-3.2-1B
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: meta-llama/Llama-3.2-1B
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
base_model: NousResearch/Llama-3.2-1B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: NousResearch/Meta-Llama-3-8B
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
base_model: meta-llama/Llama-3.2-1B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
base_model: NousResearch/Llama-3.2-1B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
|
||||
# optionally might have model_type or tokenizer_type
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: casperhansen/llama-3-70b-fp16
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: NousResearch/Meta-Llama-3-8B
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
base_model: state-spaces/mamba-2.8b
|
||||
# optionally might have model_type or tokenizer_type or tokenizer_config
|
||||
model_type: MambaLMHeadModel
|
||||
tokenizer_type: AutoTokenizer
|
||||
tokenizer_config: EleutherAI/gpt-neox-20b
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -4,8 +4,11 @@
|
||||
#face problems with the special tokens.
|
||||
|
||||
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: mistralai/Mistral-7B-v0.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: MistralForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
base_model: mosaicml/mpt-7b
|
||||
# optionally might have model_type or tokenizer_type
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
||||
load_in_8bit: false
|
||||
datasets:
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: openlm-research/open_llama_3b_v2
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
strict: false
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: openlm-research/open_llama_3b_v2
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
strict: false
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: openlm-research/open_llama_3b_v2
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
strict: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: microsoft/Phi-3.5-mini-instruct
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: microsoft/phi-1_5
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: microsoft/phi-1_5
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: microsoft/phi-2
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: microsoft/Phi-3-mini-4k-instruct
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
base_model: microsoft/Phi-3-mini-4k-instruct
|
||||
# optionally might have model_type or tokenizer_type
|
||||
trust_remote_code: true
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
chat_template: phi_3
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
base_model: EleutherAI/pythia-12b-deduped
|
||||
base_model_ignore_patterns: pytorch* # prefer safetensors
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: GPTNeoXForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
gptq: false
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: EleutherAI/pythia-1.4b-deduped
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
datasets:
|
||||
- path: teknium/GPT4-LLM-Cleaned
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: Qwen/Qwen-7B
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: Qwen/Qwen-7B
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
base_model: Qwen/Qwen2.5-0.5B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
strict: false
|
||||
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: Qwen/Qwen2-7B
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: GPTNeoXForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code:
|
||||
load_in_8bit: false
|
||||
datasets:
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
base_model: replit/replit-code-v1-3b
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
load_in_8bit: false
|
||||
datasets:
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: stabilityai/stablelm-2-1_6b
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
base_model: stabilityai/stablelm-2-1_6b
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: true
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
base_model: bigcode/starcoder2-3b
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: TinyLlama/TinyLlama_v1.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
base_model: TinyLlama/TinyLlama_v1.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: true
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
||||
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: false
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: TinyLlama/TinyLlama_v1.1
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
# An example finetuning Saleforce's XGen-7b model with 8k context using qlora
|
||||
# on Tim Dettmer's Guanaco dataset.
|
||||
base_model: Salesforce/xgen-7b-8k-base
|
||||
trust_remote_code: true
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: AutoModelForCausalLM
|
||||
tokenizer_type: AutoTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
trust_remote_code: true
|
||||
|
||||
load_in_8bit: false
|
||||
# enable 4bit for QLoRA
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
base_model: 01-ai/Yi-34B-Chat
|
||||
# optionally might have model_type or tokenizer_type
|
||||
model_type: LlamaForCausalLM
|
||||
tokenizer_type: LlamaTokenizer
|
||||
# Automatically upload checkpoint and final model to HF
|
||||
# hub_model_id: username/custom_model_name
|
||||
|
||||
load_in_8bit: false
|
||||
load_in_4bit: true
|
||||
|
||||
@@ -7,26 +7,31 @@ mamba-ssm==1.2.0.post1
|
||||
flash-attn==2.7.0.post2
|
||||
xformers>=0.0.23.post1
|
||||
autoawq==0.2.7.post3
|
||||
liger-kernel==0.4.2
|
||||
liger-kernel==0.5.2
|
||||
# END section
|
||||
|
||||
packaging==23.2
|
||||
|
||||
peft==0.14.0
|
||||
transformers>=4.46.3
|
||||
transformers==4.47.1
|
||||
tokenizers>=0.20.1
|
||||
accelerate==1.2.0
|
||||
accelerate==1.2.1
|
||||
datasets==3.1.0
|
||||
deepspeed==0.16.1
|
||||
trl==0.12.1
|
||||
|
||||
optimum==1.16.2
|
||||
hf_transfer
|
||||
sentencepiece
|
||||
gradio==3.50.2
|
||||
|
||||
pydantic==2.6.3
|
||||
addict
|
||||
fire
|
||||
PyYAML>=6.0
|
||||
requests
|
||||
sentencepiece
|
||||
wandb
|
||||
einops
|
||||
optimum==1.16.2
|
||||
hf_transfer
|
||||
colorama
|
||||
numba
|
||||
numpy>=1.24.4,<=2.0.1
|
||||
@@ -36,7 +41,6 @@ scipy
|
||||
scikit-learn==1.4.2
|
||||
nvidia-ml-py==12.560.30
|
||||
art
|
||||
gradio==3.50.2
|
||||
tensorboard
|
||||
python-dotenv==1.0.1
|
||||
|
||||
@@ -45,7 +49,6 @@ s3fs>=2024.5.0
|
||||
gcsfs>=2024.5.0
|
||||
# adlfs
|
||||
|
||||
trl==0.12.1
|
||||
zstandard==0.22.0
|
||||
fastcore
|
||||
|
||||
@@ -55,5 +58,7 @@ langdetect==1.0.9
|
||||
immutabledict==4.2.0
|
||||
antlr4-python3-runtime==4.13.2
|
||||
|
||||
torchao==0.5.0
|
||||
torchao==0.7.0
|
||||
schedulefree==1.3.0
|
||||
|
||||
axolotl-contribs-lgpl==0.0.1b2
|
||||
|
||||
@@ -32,5 +32,5 @@ else:
|
||||
raise RuntimeError(f"Torch = {v} too new!")
|
||||
x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
|
||||
print(
|
||||
f'pip install unsloth-zoo==2024.11.7 && pip install --no-deps "unsloth[{x}]==2024.11.9"'
|
||||
f'pip install unsloth-zoo==2024.12.1 && pip install --no-deps "unsloth[{x}]==2024.12.4"'
|
||||
)
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
"""Axolotl - Train and fine-tune large language models"""
|
||||
|
||||
import pkgutil
|
||||
|
||||
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
|
||||
|
||||
__version__ = "0.6.0"
|
||||
|
||||
52
src/axolotl/cli/evaluate.py
Normal file
52
src/axolotl/cli/evaluate.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""
|
||||
CLI to run training on a model
|
||||
"""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Union
|
||||
|
||||
import fire
|
||||
from dotenv import load_dotenv
|
||||
from transformers.hf_argparser import HfArgumentParser
|
||||
|
||||
from axolotl.cli import (
|
||||
check_accelerate_default_config,
|
||||
check_user_token,
|
||||
load_cfg,
|
||||
load_datasets,
|
||||
load_rl_datasets,
|
||||
print_axolotl_text_art,
|
||||
)
|
||||
from axolotl.common.cli import TrainerCliArgs
|
||||
from axolotl.evaluate import evaluate
|
||||
|
||||
LOG = logging.getLogger("axolotl.cli.evaluate")
|
||||
|
||||
|
||||
def do_evaluate(cfg, cli_args) -> Dict[str, float]:
|
||||
# pylint: disable=duplicate-code
|
||||
print_axolotl_text_art()
|
||||
check_accelerate_default_config()
|
||||
check_user_token()
|
||||
|
||||
if cfg.rl: # and cfg.rl != "orpo":
|
||||
dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args)
|
||||
else:
|
||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||
|
||||
return evaluate(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
|
||||
|
||||
|
||||
def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs) -> None:
|
||||
# pylint: disable=duplicate-code
|
||||
parsed_cfg = load_cfg(config, **kwargs)
|
||||
parser = HfArgumentParser(TrainerCliArgs)
|
||||
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
|
||||
return_remaining_strings=True
|
||||
)
|
||||
do_evaluate(parsed_cfg, parsed_cli_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
load_dotenv()
|
||||
fire.Fire(do_cli)
|
||||
0
src/axolotl/cli/integrations/__init__.py
Normal file
0
src/axolotl/cli/integrations/__init__.py
Normal file
207
src/axolotl/cli/integrations/convert_diff_transformer.py
Normal file
207
src/axolotl/cli/integrations/convert_diff_transformer.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""CLI to convert a transformers model's attns to diff attns."""
|
||||
import logging
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from time import time
|
||||
from typing import Union
|
||||
|
||||
import fire
|
||||
import torch
|
||||
import yaml
|
||||
from colorama import Fore
|
||||
from dotenv import load_dotenv
|
||||
from transformers import HfArgumentParser
|
||||
|
||||
from axolotl.cli import load_cfg, print_axolotl_text_art
|
||||
from axolotl.common.cli import ConvertDiffTransformerCliArgs, load_model_and_tokenizer
|
||||
from axolotl.integrations.diff_transformer.convert import convert_to_diff_attn
|
||||
from axolotl.utils.yaml import dump_yaml_preserved_order
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def test_inference(model, tokenizer, prompt="The quick brown fox"):
|
||||
"""Run test inference and return generation time"""
|
||||
try:
|
||||
inputs = tokenizer(prompt, return_tensors="pt")
|
||||
inputs = {
|
||||
k: v.to(device=model.device, dtype=torch.long) for k, v in inputs.items()
|
||||
}
|
||||
|
||||
start = time()
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(
|
||||
**inputs,
|
||||
max_new_tokens=20,
|
||||
num_beams=1,
|
||||
do_sample=False,
|
||||
pad_token_id=tokenizer.pad_token_id,
|
||||
use_cache=False,
|
||||
)
|
||||
elapsed = time() - start
|
||||
|
||||
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
LOG.info("Prompt: %s", prompt)
|
||||
LOG.info("Generated: %s", generated_text)
|
||||
LOG.info("Generation time: %.2fs", elapsed)
|
||||
|
||||
return elapsed, generated_text
|
||||
|
||||
except Exception as exc:
|
||||
LOG.error("Inference failed: %s", str(exc))
|
||||
raise
|
||||
|
||||
|
||||
def convert_diff_transformer(cfg, cli_args, config_path):
|
||||
debug_info = {}
|
||||
|
||||
# Load model and tokenizer
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
|
||||
model.to(cfg.device, dtype=cfg.torch_dtype)
|
||||
|
||||
# Log original model info
|
||||
LOG.info(
|
||||
"Original model config:\n\t- Hidden size: %d\n\t- Num attention heads: %d",
|
||||
model.config.hidden_size,
|
||||
model.config.num_attention_heads,
|
||||
)
|
||||
|
||||
# Test original model
|
||||
if cli_args.debug:
|
||||
LOG.info("Testing original model...")
|
||||
debug_info["orig_time"], debug_info["orig_text"] = test_inference(
|
||||
model, tokenizer
|
||||
)
|
||||
|
||||
# Convert attention
|
||||
LOG.info("Converting to differential attention...")
|
||||
if cli_args.split_heads and cli_args.zero_init:
|
||||
LOG.warning(
|
||||
Fore.YELLOW
|
||||
+ "Warning: Using split_heads with zero_init is not recommended; "
|
||||
+ "split_heads will preclude the effects of zero_init"
|
||||
+ Fore.RESET
|
||||
)
|
||||
try:
|
||||
model = convert_to_diff_attn(
|
||||
model=model,
|
||||
zero_init=cli_args.zero_init,
|
||||
sublayer_norm=cli_args.sublayer_norm,
|
||||
split_heads=cli_args.split_heads,
|
||||
)
|
||||
model.to(cfg.device, dtype=cfg.torch_dtype)
|
||||
except Exception as exc:
|
||||
LOG.error(Fore.RED + "Conversion failed: %s" + Fore.RESET, str(exc))
|
||||
raise
|
||||
|
||||
# Test converted model
|
||||
if cli_args.debug:
|
||||
LOG.info("Testing converted model...")
|
||||
debug_info["conv_time"], debug_info["conv_text"] = test_inference(
|
||||
model, tokenizer
|
||||
)
|
||||
|
||||
# Save if requested
|
||||
if cfg.output_dir:
|
||||
# Save model and tokenizer
|
||||
LOG.info("Saving converted model to %s", cfg.output_dir)
|
||||
model.save_pretrained(cfg.output_dir)
|
||||
tokenizer.save_pretrained(cfg.output_dir)
|
||||
|
||||
# Modify config to reflect new path / differential attention
|
||||
output_config_path = Path(cfg.output_dir) / "axolotl_config.yml"
|
||||
LOG.info("Saving updated config to %s", output_config_path)
|
||||
|
||||
with open(config_path, "r", encoding="utf-8") as file:
|
||||
modified_cfg = yaml.safe_load(file) or {}
|
||||
|
||||
modified_cfg["base_model"] = cfg.output_dir
|
||||
modified_cfg["diff_attention"] = True
|
||||
plugin_class = (
|
||||
"axolotl.integrations.diff_transformer.DifferentialTransformerPlugin"
|
||||
)
|
||||
if "plugins" in modified_cfg:
|
||||
modified_cfg["plugins"].append(plugin_class)
|
||||
else:
|
||||
modified_cfg["plugins"] = [plugin_class]
|
||||
|
||||
dump_yaml_preserved_order(
|
||||
data=modified_cfg,
|
||||
reference_yaml_path=config_path,
|
||||
output_path=output_config_path,
|
||||
)
|
||||
else:
|
||||
LOG.info("Not saving converted model to disk")
|
||||
LOG.info("Pass --output-dir path/to/save to save model")
|
||||
|
||||
if cli_args.debug:
|
||||
LOG.info(
|
||||
Fore.GREEN
|
||||
+ "Conversion successful!\n"
|
||||
+ f"Original generation time: {debug_info['orig_time']:.2f}s\n"
|
||||
+ f"Converted generation time: {debug_info['conv_time']:.2f}s"
|
||||
+ Fore.RESET
|
||||
)
|
||||
|
||||
if debug_info["orig_text"] == debug_info["conv_text"]:
|
||||
LOG.info(
|
||||
Fore.GREEN
|
||||
+ "Generations match!\n"
|
||||
+ "Model generation:\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ f"{debug_info['orig_text']}\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ Fore.RESET
|
||||
)
|
||||
debug_info["generations_match"] = True
|
||||
else:
|
||||
message = (
|
||||
"Generations do not match.\n"
|
||||
+ "Original generation:\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ f"{debug_info['orig_text']}\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ "Converted generation:\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ f"{debug_info['conv_text']}\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
)
|
||||
debug_info["generations_match"] = False
|
||||
|
||||
if cli_args.zero_init and not cli_args.sublayer_norm:
|
||||
LOG.info(Fore.RED + message + Fore.RESET)
|
||||
debug_info["match_expected"] = True
|
||||
else:
|
||||
LOG.info(
|
||||
Fore.YELLOW
|
||||
+ message
|
||||
+ "However, this is expected since --zero-init"
|
||||
+ " and --no-sublayer-norm were not passed."
|
||||
+ Fore.RESET
|
||||
)
|
||||
debug_info["match_expected"] = False
|
||||
|
||||
return model, debug_info
|
||||
|
||||
|
||||
def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
|
||||
print_axolotl_text_art()
|
||||
|
||||
cfg = load_cfg(config, **kwargs)
|
||||
parser = HfArgumentParser(ConvertDiffTransformerCliArgs)
|
||||
cli_args, _ = parser.parse_args_into_dataclasses(return_remaining_strings=True)
|
||||
|
||||
convert_diff_transformer(cfg, cli_args, config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
load_dotenv()
|
||||
fire.Fire(do_cli)
|
||||
197
src/axolotl/cli/integrations/convert_rala.py
Normal file
197
src/axolotl/cli/integrations/convert_rala.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""CLI to convert a transformers model's attns to rala attns."""
|
||||
import logging
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from time import time
|
||||
from typing import Union
|
||||
|
||||
import fire
|
||||
import torch
|
||||
import yaml
|
||||
from colorama import Fore
|
||||
from dotenv import load_dotenv
|
||||
from transformers import HfArgumentParser
|
||||
|
||||
from axolotl.cli import load_cfg, print_axolotl_text_art
|
||||
from axolotl.common.cli import ConvertDiffTransformerCliArgs, load_model_and_tokenizer
|
||||
from axolotl.integrations.rala.convert import convert_to_rala
|
||||
from axolotl.utils.yaml import dump_yaml_preserved_order
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def test_inference(model, tokenizer, prompt="The quick brown fox"):
|
||||
"""Run test inference and return generation time"""
|
||||
try:
|
||||
inputs = tokenizer(prompt, return_tensors="pt")
|
||||
inputs = {
|
||||
k: v.to(device=model.device, dtype=torch.long) for k, v in inputs.items()
|
||||
}
|
||||
|
||||
start = time()
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(
|
||||
**inputs,
|
||||
max_new_tokens=20,
|
||||
num_beams=1,
|
||||
do_sample=False,
|
||||
pad_token_id=tokenizer.pad_token_id,
|
||||
use_cache=False,
|
||||
)
|
||||
elapsed = time() - start
|
||||
|
||||
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
LOG.info("Prompt: %s", prompt)
|
||||
LOG.info("Generated: %s", generated_text)
|
||||
LOG.info("Generation time: %.2fs", elapsed)
|
||||
|
||||
return elapsed, generated_text
|
||||
|
||||
except Exception as exc:
|
||||
LOG.error("Inference failed: %s", str(exc))
|
||||
raise
|
||||
|
||||
|
||||
def convert_rala(cfg, cli_args, config_path):
|
||||
debug_info = {}
|
||||
|
||||
# Load model and tokenizer
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
|
||||
model.to(cfg.device, dtype=cfg.torch_dtype)
|
||||
|
||||
# Log original model info
|
||||
LOG.info(
|
||||
"Original model config:\n\t- Hidden size: %d\n\t- Num attention heads: %d",
|
||||
model.config.hidden_size,
|
||||
model.config.num_attention_heads,
|
||||
)
|
||||
|
||||
# Test original model
|
||||
if cli_args.debug:
|
||||
LOG.info("attention layers to RALA attention")
|
||||
debug_info["orig_time"], debug_info["orig_text"] = test_inference(
|
||||
model, tokenizer
|
||||
)
|
||||
|
||||
# Convert attention
|
||||
try:
|
||||
model = convert_to_rala(
|
||||
model=model,
|
||||
zero_init=cli_args.zero_init,
|
||||
)
|
||||
model.to(cfg.device, dtype=cfg.torch_dtype)
|
||||
except Exception as exc:
|
||||
LOG.error(Fore.RED + "Conversion failed: %s" + Fore.RESET, str(exc))
|
||||
raise
|
||||
|
||||
# Test converted model
|
||||
if cli_args.debug:
|
||||
LOG.info("Testing converted model...")
|
||||
debug_info["conv_time"], debug_info["conv_text"] = test_inference(
|
||||
model, tokenizer
|
||||
)
|
||||
|
||||
# Save if requested
|
||||
if cfg.output_dir:
|
||||
# Save model and tokenizer
|
||||
LOG.info("Saving converted model to %s", cfg.output_dir)
|
||||
model.save_pretrained(cfg.output_dir)
|
||||
tokenizer.save_pretrained(cfg.output_dir)
|
||||
|
||||
# Modify config to reflect new path / differential attention
|
||||
output_config_path = Path(cfg.output_dir) / "axolotl_config.yml"
|
||||
LOG.info("Saving updated config to %s", output_config_path)
|
||||
|
||||
with open(config_path, "r", encoding="utf-8") as file:
|
||||
modified_cfg = yaml.safe_load(file) or {}
|
||||
|
||||
modified_cfg["base_model"] = cfg.output_dir
|
||||
modified_cfg["rala_attention"] = True
|
||||
plugin_class = "axolotl.integrations.rala.RalaPlugin"
|
||||
if "plugins" in modified_cfg:
|
||||
modified_cfg["plugins"].append(plugin_class)
|
||||
else:
|
||||
modified_cfg["plugins"] = [plugin_class]
|
||||
|
||||
dump_yaml_preserved_order(
|
||||
data=modified_cfg,
|
||||
reference_yaml_path=config_path,
|
||||
output_path=output_config_path,
|
||||
)
|
||||
else:
|
||||
LOG.info("Not saving converted model to disk")
|
||||
LOG.info("Pass --output-dir path/to/save to save model")
|
||||
|
||||
if cli_args.debug:
|
||||
LOG.info(
|
||||
Fore.GREEN
|
||||
+ "Conversion successful!\n"
|
||||
+ f"Original generation time: {debug_info['orig_time']:.2f}s\n"
|
||||
+ f"Converted generation time: {debug_info['conv_time']:.2f}s"
|
||||
+ Fore.RESET
|
||||
)
|
||||
|
||||
if debug_info["orig_text"] == debug_info["conv_text"]:
|
||||
LOG.info(
|
||||
Fore.GREEN
|
||||
+ "Generations match!\n"
|
||||
+ "Model generation:\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ f"{debug_info['orig_text']}\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ Fore.RESET
|
||||
)
|
||||
debug_info["generations_match"] = True
|
||||
else:
|
||||
message = (
|
||||
"Generations do not match.\n"
|
||||
+ "Original generation:\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ f"{debug_info['orig_text']}\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ "Converted generation:\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
+ f"{debug_info['conv_text']}\n"
|
||||
+ "*" * 50
|
||||
+ "\n"
|
||||
)
|
||||
debug_info["generations_match"] = False
|
||||
|
||||
if cli_args.zero_init and not cli_args.sublayer_norm:
|
||||
LOG.info(Fore.RED + message + Fore.RESET)
|
||||
debug_info["match_expected"] = True
|
||||
else:
|
||||
LOG.info(
|
||||
Fore.YELLOW
|
||||
+ message
|
||||
+ "However, this is expected since --zero-init"
|
||||
+ " and --no-sublayer-norm were not passed."
|
||||
+ Fore.RESET
|
||||
)
|
||||
debug_info["match_expected"] = False
|
||||
|
||||
return model, debug_info
|
||||
|
||||
|
||||
def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
|
||||
print_axolotl_text_art()
|
||||
|
||||
cfg = load_cfg(config, **kwargs)
|
||||
if cfg.rala_attention:
|
||||
cfg.rala_attention = False
|
||||
parser = HfArgumentParser(ConvertDiffTransformerCliArgs)
|
||||
cli_args, _ = parser.parse_args_into_dataclasses(return_remaining_strings=True)
|
||||
|
||||
convert_rala(cfg, cli_args, config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
load_dotenv()
|
||||
fire.Fire(do_cli)
|
||||
@@ -12,7 +12,13 @@ from axolotl.cli.utils import (
|
||||
build_command,
|
||||
fetch_from_github,
|
||||
)
|
||||
from axolotl.common.cli import PreprocessCliArgs, TrainerCliArgs
|
||||
from axolotl.common.cli import (
|
||||
ConvertDiffTransformerCliArgs,
|
||||
EvaluateCliArgs,
|
||||
PreprocessCliArgs,
|
||||
TrainerCliArgs,
|
||||
)
|
||||
from axolotl.utils import set_pytorch_cuda_alloc_conf
|
||||
from axolotl.utils.config.models.input.v0_4_1 import AxolotlInputConfig
|
||||
|
||||
|
||||
@@ -48,6 +54,9 @@ def train(config: str, accelerate: bool, **kwargs):
|
||||
"""Train or fine-tune a model."""
|
||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
||||
|
||||
# Enable expandable segments for cuda allocation to improve VRAM usage
|
||||
set_pytorch_cuda_alloc_conf()
|
||||
|
||||
if accelerate:
|
||||
base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.train"]
|
||||
if config:
|
||||
@@ -60,6 +69,34 @@ def train(config: str, accelerate: bool, **kwargs):
|
||||
do_cli(config=config, **kwargs)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
||||
@click.option(
|
||||
"--accelerate/--no-accelerate",
|
||||
default=True,
|
||||
help="Use accelerate launch for multi-GPU training",
|
||||
)
|
||||
@add_options_from_dataclass(EvaluateCliArgs)
|
||||
@add_options_from_config(AxolotlInputConfig)
|
||||
def evaluate(config: str, accelerate: bool, **kwargs):
|
||||
"""Evaluate a model."""
|
||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
||||
|
||||
# Enable expandable segments for cuda allocation to improve VRAM usage
|
||||
set_pytorch_cuda_alloc_conf()
|
||||
|
||||
if accelerate:
|
||||
base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.evaluate"]
|
||||
if config:
|
||||
base_cmd.append(config)
|
||||
cmd = build_command(base_cmd, kwargs)
|
||||
subprocess.run(cmd, check=True) # nosec B603
|
||||
else:
|
||||
from axolotl.cli.evaluate import do_cli
|
||||
|
||||
do_cli(config=config, **kwargs)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
||||
@click.option(
|
||||
@@ -211,6 +248,32 @@ def merge_lora(
|
||||
do_cli(config=config, **kwargs)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
||||
@add_options_from_dataclass(ConvertDiffTransformerCliArgs)
|
||||
@add_options_from_config(AxolotlInputConfig)
|
||||
def convert_diff_transformer(config: str, **kwargs):
|
||||
"""Convert model attention layers to differential attention layers."""
|
||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
||||
|
||||
from axolotl.cli.integrations.convert_diff_transformer import do_cli
|
||||
|
||||
do_cli(config=config, **kwargs)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
||||
@add_options_from_dataclass(ConvertDiffTransformerCliArgs)
|
||||
@add_options_from_config(AxolotlInputConfig)
|
||||
def convert_rala(config: str, **kwargs):
|
||||
"""Convert model attention layers to RALA attention layers."""
|
||||
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
||||
|
||||
from axolotl.cli.integrations.convert_rala import do_cli
|
||||
|
||||
do_cli(config=config, **kwargs)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("directory", type=click.Choice(["examples", "deepspeed_configs"]))
|
||||
@click.option("--dest", help="Destination directory")
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user