Compare commits

..

1 Commits

Author SHA1 Message Date
NanoCode012
8428b3f2c7 feat: add dpo liger 2024-12-16 22:19:27 +07:00
132 changed files with 1352 additions and 2950 deletions

1
.gitignore vendored
View File

@@ -1,7 +1,6 @@
**/axolotl.egg-info
configs
last_run_prepared/
outputs
.vscode
_site/

View File

@@ -217,7 +217,7 @@ If you love axolotl, consider sponsoring the project by reaching out directly to
---
- [Modal](https://www.modal.com?utm_source=github&utm_medium=github&utm_campaign=axolotl) Modal lets you run data/AI jobs in the cloud, by just writing a few lines of Python. Customers use Modal to deploy Gen AI models at large scale, fine-tune LLM models, run protein folding simulations, and much more.
- [Modal](https://modal.com/) Modal lets you run data/AI jobs in the cloud, by just writing a few lines of Python. Customers use Modal to deploy Gen AI models at large scale, fine-tune LLM models, run protein folding simulations, and much more.
---

View File

@@ -5,6 +5,6 @@ python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/patched/
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/integrations/
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/

View File

@@ -1,27 +0,0 @@
{
"zero_optimization": {
"stage": 1,
"overlap_comm": true
},
"bf16": {
"enabled": "auto"
},
"fp16": {
"enabled": "auto",
"auto_cast": false,
"loss_scale": 0,
"initial_scale_power": 32,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
},
"compile": {
"disable": false,
"backend": "inductor"
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}

View File

@@ -127,40 +127,34 @@ datasets:
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
chat_template: tokenizer_default
# Custom jinja chat template. Used only if `chat_template: jinja` or empty.
# Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
chat_template_jinja:
# Key containing the messages (default: "messages")
# The key in the data example that contains the messages. Default is "messages".
field_messages: messages
# Key for role in each message (default: "role")
# The key in the message turn that contains the role. Default is "role".
message_field_role: role
# Key for content in each message (default: "content")
# The key in the message turn that contains the content. Default is "content".
message_field_content: content
# Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
# Optional[Dict[str, List]]. Roles mapping for the messages.
roles:
user: ["human", "user"]
assistant: ["gpt", "assistant"]
assistant: ["gpt", "assistant", "ai"]
system: ["system"]
tool: ["tool"]
# IMPORTANT: The following fields determine which parts of the conversation to train on.
# Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
# See examples at `docs/dataset-formats/conversation.qmd`
# Note: If the below 4 fields are empty, defaults to training only on the last message.
## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
roles_to_train: ["assistant"] # default
roles_to_train: ["gpt", "assistant"]
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
# - all: train on all EOS tokens
# - turn (default): train on the EOS token at the end of each trainable turn
# - turn: train on the EOS token at the end of each trainable turn
# - last: train on the last EOS token in the conversation
train_on_eos: last
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
message_field_training: training
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
# See example at `docs/dataset-formats/conversation.qmd`
message_field_training_detail: train_detail
@@ -245,9 +239,6 @@ sample_packing_group_size: 100000
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
sample_packing_bin_size: 200
# Use batch flattening for speedups when not using sample_packing
batch_flattening:
# Passed through to transformers when loading the model when launched without accelerate
# Use `sequential` when training w/ model parallelism to limit memory
device_map:
@@ -340,8 +331,7 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
output_dir: ./completed-model
# Whether to use torch.compile and which backend to use
# setting to `auto` will enable torch compile when torch>=2.5.1
torch_compile: # Optional[Union[Literal["auto"], bool]]
torch_compile: # bool
torch_compile_backend: # Optional[str]
# Training hyperparameters
@@ -373,10 +363,6 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
# see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
# snapshots can be visualized @ https://pytorch.org/memory_viz
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)

View File

@@ -68,8 +68,6 @@ We recommend checking the below examples for other usecases.
datasets:
- path: ...
type: chat_template
roles_to_train:
train_on_eos:
```
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
@@ -79,7 +77,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
datasets:
- path: ...
type: chat_template
roles_to_train: ["assistant"] # default value
roles_to_train: ["assistant"]
```
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
@@ -89,6 +87,7 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
datasets:
- path: ...
type: chat_template
roles_to_train: ["assistant"]
```
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
@@ -100,6 +99,7 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
datasets:
- path: ...
type: chat_template
roles_to_train: ["assistant"]
```
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation

View File

@@ -1,10 +1,6 @@
base_model: cerebras/btlm-3b-8k-base
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: GPT2Tokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
tokenizer_use_fast: true
tokenizer_legacy: true

View File

@@ -1,7 +1,4 @@
base_model: cerebras/Cerebras-GPT-1.3B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true
strict: false

View File

@@ -1,15 +0,0 @@
volumes:
- name: axolotl-data
mount: /workspace/data
- name: axolotl-artifacts
mount: /workspace/artifacts
secrets:
- HF_TOKEN
- WANDB_API_KEY
branch: cli-cloud-modal
gpu: h100
gpu_count: 1
memory: 128
timeout: 86400
timeout_preprocess: 14400
memory_preprocess: 32

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-13b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-13b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-34b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-34b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,7 +1,4 @@
base_model: LnL-AI/dbrx-base-converted-v2
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,7 +1,4 @@
base_model: LnL-AI/dbrx-base-converted-v2
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: true

View File

@@ -1,7 +1,4 @@
base_model: LnL-AI/dbrx-base-converted-v2
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,6 +1,4 @@
base_model: deepseek-ai/DeepSeek-V2-Lite
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,7 +1,4 @@
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,12 +1,7 @@
base_model: tiiuae/falcon-7b
# optionally might have model_type or tokenizer_type
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,15 +1,10 @@
# 1b: tiiuae/falcon-rw-1b
# 40b: tiiuae/falcon-40b
base_model: tiiuae/falcon-7b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
# enable 4bit for QLoRA

View File

@@ -1,12 +1,7 @@
base_model: tiiuae/falcon-7b
# optionally might have model_type or tokenizer_type
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,10 +1,7 @@
# use google/gemma-7b if you have access
base_model: mhenrichsen/gemma-7b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: google/gemma-2-9b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: google/gemma-2-2b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForSequenceClassification
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,7 +1,4 @@
base_model: EleutherAI/gpt-j-6b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true
strict: false

View File

@@ -1,7 +1,4 @@
base_model: ai21labs/Jamba-v0.1
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,6 +1,4 @@
base_model: ai21labs/Jamba-v0.1
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,8 +1,5 @@
base_model: ai21labs/AI21-Jamba-1.5-Large
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_4bit: true
strict: false

View File

@@ -1,10 +1,6 @@
base_model: huggyllama/llama-7b
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
datasets:
- path: openaccess-ai-collective/jeopardy

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,13 +1,8 @@
base_model: TheBloke/Llama-2-7B-GPTQ
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
gptq: true
gptq_disable_exllama: true
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
tokenizer_use_fast: true
tokenizer_legacy: true
load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,5 @@
base_model: alpindale/Llama-3.2-11B-Vision-Instruct
# optionally might have model_type or tokenizer_type or processor_type
processor_type: AutoProcessor
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
strict: false
# these 3 lines are needed for now to handle vision chat templates w images

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Meta-Llama-3.1-8B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
plugins:
- axolotl.integrations.liger.LigerPlugin

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Meta-Llama-3.1-8B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: meta-llama/Meta-Llama-3-8B-Instruct
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Meta-Llama-3-8B-Instruct
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: meta-llama/Llama-3.2-1B
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: meta-llama/Llama-3.2-1B
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Llama-3.2-1B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Meta-Llama-3-8B
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,6 +1,4 @@
base_model: meta-llama/Llama-3.2-1B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Llama-3.2-1B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,8 +1,5 @@
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_4bit: true
strict: false

View File

@@ -1,9 +1,6 @@
base_model: casperhansen/llama-3-70b-fp16
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Meta-Llama-3-8B
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,10 +1,7 @@
base_model: state-spaces/mamba-2.8b
# optionally might have model_type or tokenizer_type or tokenizer_config
model_type: MambaLMHeadModel
tokenizer_type: AutoTokenizer
tokenizer_config: EleutherAI/gpt-neox-20b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,10 +1,6 @@
base_model: mistral-community/Mixtral-8x22B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -4,11 +4,8 @@
#face problems with the special tokens.
base_model: mistralai/Mistral-7B-Instruct-v0.2
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,10 +1,6 @@
base_model: mistralai/Mixtral-8x7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: mistral-community/Mixtral-8x22B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,10 +1,6 @@
base_model: mistralai/Mixtral-8x7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: mistralai/Mixtral-8x7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: mistral-community/Mixtral-8x22B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,5 @@
base_model: mosaicml/mpt-7b
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
load_in_8bit: false
datasets:

View File

@@ -1,10 +1,6 @@
base_model: openlm-research/open_llama_3b_v2
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false
strict: false

View File

@@ -1,10 +1,6 @@
base_model: openlm-research/open_llama_3b_v2
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false
strict: false

View File

@@ -1,10 +1,6 @@
base_model: openlm-research/open_llama_3b_v2
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true
strict: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/Phi-3.5-mini-instruct
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/phi-1_5
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/phi-1_5
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: microsoft/phi-2
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/Phi-3-mini-4k-instruct
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,11 +1,7 @@
base_model: microsoft/Phi-3-mini-4k-instruct
# optionally might have model_type or tokenizer_type
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
chat_template: phi_3
load_in_8bit: false

View File

@@ -1,11 +1,7 @@
base_model: EleutherAI/pythia-12b-deduped
base_model_ignore_patterns: pytorch* # prefer safetensors
# optionally might have model_type or tokenizer_type
model_type: GPTNeoXForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false
gptq: false

View File

@@ -1,7 +1,4 @@
base_model: EleutherAI/pythia-1.4b-deduped
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
datasets:
- path: teknium/GPT4-LLM-Cleaned

View File

@@ -1,9 +1,6 @@
base_model: Qwen/Qwen-7B
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true

View File

@@ -1,9 +1,6 @@
base_model: Qwen/Qwen-7B
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true

View File

@@ -1,7 +1,4 @@
base_model: Qwen/Qwen1.5-MoE-A2.7B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,7 +1,4 @@
base_model: Qwen/Qwen1.5-MoE-A2.7B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,6 +1,4 @@
base_model: Qwen/Qwen2.5-0.5B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
strict: false

View File

@@ -1,7 +1,4 @@
base_model: Qwen/Qwen2-7B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
# optionally might have model_type or tokenizer_type
model_type: GPTNeoXForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code:
load_in_8bit: false
datasets:

View File

@@ -1,7 +1,4 @@
base_model: replit/replit-code-v1-3b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false
datasets:

View File

@@ -1,10 +1,6 @@
base_model: stabilityai/stablelm-2-1_6b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: stabilityai/stablelm-2-1_6b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: true

View File

@@ -1,6 +1,4 @@
base_model: bigcode/starcoder2-3b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: TinyLlama/TinyLlama_v1.1
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,8 +1,5 @@
base_model: TinyLlama/TinyLlama_v1.1
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,7 @@
base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: TinyLlama/TinyLlama_v1.1
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,14 +1,9 @@
# An example finetuning Saleforce's XGen-7b model with 8k context using qlora
# on Tim Dettmer's Guanaco dataset.
base_model: Salesforce/xgen-7b-8k-base
# optionally might have model_type or tokenizer_type
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false
# enable 4bit for QLoRA
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: 01-ai/Yi-34B-Chat
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,11 +0,0 @@
lm_eval_model: axolotl-ai-co/numina-8b-ep1-exp1
lm_eval_tasks:
- leaderboard_math_hard
lm_eval_batch_size: 64
apply_chat_template: false
wandb_project: numina-kd-experiment
wandb_entity: axolotl-ai
bf16: true
flash_attention: true
output_dir: ./outputs/model-evals-out

View File

@@ -7,32 +7,26 @@ mamba-ssm==1.2.0.post1
flash-attn==2.7.0.post2
xformers>=0.0.23.post1
autoawq==0.2.7.post3
liger-kernel==0.5.2
liger-kernel==0.4.2
# END section
packaging==23.2
peft==0.14.0
transformers==4.47.1
transformers>=4.46.3
tokenizers>=0.20.1
accelerate==1.2.1
accelerate==1.2.0
datasets==3.1.0
deepspeed==0.16.1
trl==0.12.1
optimum==1.16.2
hf_transfer
sentencepiece
gradio==3.50.2
modal==0.70.5
pydantic==2.6.3
addict
fire
PyYAML>=6.0
requests
sentencepiece
wandb
einops
optimum==1.16.2
hf_transfer
colorama
numba
numpy>=1.24.4,<=2.0.1
@@ -42,6 +36,7 @@ scipy
scikit-learn==1.4.2
nvidia-ml-py==12.560.30
art
gradio==3.50.2
tensorboard
python-dotenv==1.0.1
@@ -50,16 +45,15 @@ s3fs>=2024.5.0
gcsfs>=2024.5.0
# adlfs
trl==0.12.1
zstandard==0.22.0
fastcore
# lm eval harness
lm_eval==0.4.7
lm_eval==0.4.4
langdetect==1.0.9
immutabledict==4.2.0
antlr4-python3-runtime==4.13.2
torchao==0.7.0
torchao==0.5.0
schedulefree==1.3.0
axolotl-contribs-lgpl==0.0.2

View File

@@ -1,15 +1,10 @@
#@@ #@@ @@# @@#
@@ @@ @@ @@ =@@# @@ #@ =@@#.
@@ #@@@@@@@@@ @@ #@#@= @@ #@ .=@@
#@@@@@@@@@@@@@@@@@ =@# @# ##= ## =####=+ @@ =#####+ =#@@###. @@
@@@@@@@@@@/ +@@/ +@@ #@ =@= #@= @@ =@#+ +#@# @@ =@#+ +#@# #@. @@
@@@@@@@@@@ ##@@ ##@@ =@# @# =@# @# @@ @@ @@ @@ #@ #@ @@
@@@@@@@@@@@@@@@@@@@@ #@=+++#@= =@@# @@ @@ @@ @@ #@ #@ @@
=@#=====@@ =@# @# @@ @@ @@ @@ #@ #@ @@
@@@@@@@@@@@@@@@@ @@@@ #@ #@= #@= +@@ #@# =@# @@. =@# =@# #@. @@
=@# @# #@= #@ =#@@@@#= +#@@= +#@@@@#= .##@@+ @@
@@@@ @@@@@@@@@@@@@@@@
dP dP dP
88 88 88
.d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88
88' `88 `8bd8' 88' `88 88 88' `88 88 88
88. .88 .d88b. 88. .88 88 88. .88 88 88
`88888P8 dP' `dP `88888P' dP `88888P' dP dP
Welcome to the axolotl cloud image! If the you've mounted a disk to /workspace and the axolotl directory ie empty, run the following commands:

View File

@@ -32,5 +32,5 @@ else:
raise RuntimeError(f"Torch = {v} too new!")
x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
print(
f'pip install unsloth-zoo==2024.12.1 && pip install --no-deps "unsloth[{x}]==2024.12.4"'
f'pip install unsloth-zoo==2024.11.7 && pip install --no-deps "unsloth[{x}]==2024.11.9"'
)

View File

@@ -1,7 +1,3 @@
"""Axolotl - Train and fine-tune large language models"""
import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
__version__ = "0.6.0"

View File

@@ -1,56 +0,0 @@
"""
launch axolotl in supported cloud platforms
"""
from pathlib import Path
from typing import Union
import yaml
from axolotl.cli import print_axolotl_text_art
from axolotl.cli.cloud.modal_ import ModalCloud
from axolotl.utils.dict import DictDefault
def load_cloud_cfg(cloud_config: Union[Path, str]) -> DictDefault:
"""Load and validate cloud configuration."""
# Load cloud configuration.
with open(cloud_config, encoding="utf-8") as file:
cloud_cfg: DictDefault = DictDefault(yaml.safe_load(file))
return cloud_cfg
def do_cli_preprocess(
cloud_config: Union[Path, str],
config: Union[Path, str] = Path("examples/"),
) -> None:
print_axolotl_text_art()
cloud_cfg = load_cloud_cfg(cloud_config)
cloud = ModalCloud(cloud_cfg)
with open(config, "r", encoding="utf-8") as file:
config_yaml = file.read()
cloud.preprocess(config_yaml)
def do_cli_train(
cloud_config: Union[Path, str],
config: Union[Path, str] = Path("examples/"),
accelerate: bool = True,
) -> None:
print_axolotl_text_art()
cloud_cfg = load_cloud_cfg(cloud_config)
cloud = ModalCloud(cloud_cfg)
with open(config, "r", encoding="utf-8") as file:
config_yaml = file.read()
cloud.train(config_yaml, accelerate=accelerate)
def do_cli_lm_eval(
cloud_config: Union[Path, str],
config: Union[Path, str] = Path("examples/"),
) -> None:
print_axolotl_text_art()
cloud_cfg = load_cloud_cfg(cloud_config)
cloud = ModalCloud(cloud_cfg)
with open(config, "r", encoding="utf-8") as file:
config_yaml = file.read()
cloud.lm_eval(config_yaml)

Some files were not shown because too many files have changed in this diff Show More