Compare commits

..

11 Commits

Author SHA1 Message Date
bursteratom
60c98a4353 stuff 2024-12-13 15:44:51 -05:00
bursteratom
c760d2b815 test accelerator 2024-12-12 12:29:35 -05:00
bursteratom
2014f58181 set os environ RANK 2024-12-11 11:45:07 -05:00
bursteratom
b5f9dd44f2 set os environ RANK 2024-12-11 11:40:20 -05:00
bursteratom
b17b1aada7 initialise process group for tp 2024-12-11 11:37:21 -05:00
bursteratom
85381b6b15 initialise process group for tp 2024-12-11 11:35:16 -05:00
bursteratom
acde081321 test lora tp 2024-12-11 11:19:34 -05:00
bursteratom
e4c68a0cbc test lora tp 2024-12-11 11:11:52 -05:00
bursteratom
3855f5c3d3 tp example tp auto 2024-12-11 11:03:39 -05:00
bursteratom
5dd566dc63 tp example 2024-12-11 11:01:23 -05:00
bursteratom
42389c1f78 enable tensor parallel 2024-12-11 10:38:14 -05:00
158 changed files with 940 additions and 4631 deletions

View File

@@ -44,11 +44,6 @@ jobs:
python-version: ${{ matrix.python_version }} python-version: ${{ matrix.python_version }}
cache: 'pip' # caching pip dependencies cache: 'pip' # caching pip dependencies
- name: upgrade pip
run: |
pip3 install --upgrade pip
pip3 install --upgrade packaging setuptools wheel
- name: Install PyTorch - name: Install PyTorch
run: | run: |
pip3 install torch==${{ matrix.pytorch_version }} --index-url https://download.pytorch.org/whl/cpu pip3 install torch==${{ matrix.pytorch_version }} --index-url https://download.pytorch.org/whl/cpu

4
.gitignore vendored
View File

@@ -1,7 +1,6 @@
**/axolotl.egg-info **/axolotl.egg-info
configs configs
last_run_prepared/ last_run_prepared/
outputs
.vscode .vscode
_site/ _site/
@@ -186,6 +185,3 @@ out/
# vim # vim
*.swp *.swp
# symlinked to axolotl-artifacts in docker containers
outputs

View File

@@ -4,6 +4,7 @@ set -e
python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__" python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/ pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/ # pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/ pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/patched/
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/integrations/
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/ pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/

View File

@@ -127,40 +127,34 @@ datasets:
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml. # - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field. # - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
chat_template: tokenizer_default chat_template: tokenizer_default
# Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
# Custom jinja chat template. Used only if `chat_template: jinja` or empty.
chat_template_jinja: chat_template_jinja:
# The key in the data example that contains the messages. Default is "messages".
# Key containing the messages (default: "messages")
field_messages: messages field_messages: messages
# Key for role in each message (default: "role") # The key in the message turn that contains the role. Default is "role".
message_field_role: role message_field_role: role
# Key for content in each message (default: "content") # The key in the message turn that contains the content. Default is "content".
message_field_content: content message_field_content: content
# Optional[Dict[str, List]]. Roles mapping for the messages.
# Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
roles: roles:
user: ["human", "user"] user: ["human", "user"]
assistant: ["gpt", "assistant"] assistant: ["gpt", "assistant", "ai"]
system: ["system"] system: ["system"]
tool: ["tool"]
# IMPORTANT: The following fields determine which parts of the conversation to train on. ## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.
# Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
# See examples at `docs/dataset-formats/conversation.qmd`
# Note: If the below 4 fields are empty, defaults to training only on the last message.
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss. # Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
roles_to_train: ["assistant"] # default roles_to_train: ["gpt", "assistant"]
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are: # Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
# - all: train on all EOS tokens # - all: train on all EOS tokens
# - turn (default): train on the EOS token at the end of each trainable turn # - turn: train on the EOS token at the end of each trainable turn
# - last: train on the last EOS token in the conversation # - last: train on the last EOS token in the conversation
train_on_eos: last train_on_eos: last
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`. # The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
message_field_training: training message_field_training: training
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn. # The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train). # The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
# See example at `docs/dataset-formats/conversation.qmd`
message_field_training_detail: train_detail message_field_training_detail: train_detail
@@ -245,9 +239,6 @@ sample_packing_group_size: 100000
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples. # The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
sample_packing_bin_size: 200 sample_packing_bin_size: 200
# Use batch flattening for speedups when not using sample_packing
batch_flattening:
# Passed through to transformers when loading the model when launched without accelerate # Passed through to transformers when loading the model when launched without accelerate
# Use `sequential` when training w/ model parallelism to limit memory # Use `sequential` when training w/ model parallelism to limit memory
device_map: device_map:
@@ -340,8 +331,7 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
output_dir: ./completed-model output_dir: ./completed-model
# Whether to use torch.compile and which backend to use # Whether to use torch.compile and which backend to use
# setting to `auto` will enable torch compile when torch>=2.5.1 torch_compile: # bool
torch_compile: # Optional[Union[Literal["auto"], bool]]
torch_compile_backend: # Optional[str] torch_compile_backend: # Optional[str]
# Training hyperparameters # Training hyperparameters
@@ -373,10 +363,6 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128 eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"] eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
# see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
# snapshots can be visualized @ https://pytorch.org/memory_viz
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training) loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3) loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)

View File

@@ -68,8 +68,6 @@ We recommend checking the below examples for other usecases.
datasets: datasets:
- path: ... - path: ...
type: chat_template type: chat_template
roles_to_train:
train_on_eos:
``` ```
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages. 2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
@@ -79,7 +77,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
datasets: datasets:
- path: ... - path: ...
type: chat_template type: chat_template
roles_to_train: ["assistant"] # default value roles_to_train: ["assistant"]
``` ```
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages. 3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
@@ -89,6 +87,7 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
datasets: datasets:
- path: ... - path: ...
type: chat_template type: chat_template
roles_to_train: ["assistant"]
``` ```
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages. 4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
@@ -100,6 +99,7 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
datasets: datasets:
- path: ... - path: ...
type: chat_template type: chat_template
roles_to_train: ["assistant"]
``` ```
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation 5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation

View File

@@ -1,10 +1,6 @@
base_model: cerebras/btlm-3b-8k-base base_model: cerebras/btlm-3b-8k-base
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: GPT2Tokenizer tokenizer_type: GPT2Tokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
tokenizer_use_fast: true tokenizer_use_fast: true
tokenizer_legacy: true tokenizer_legacy: true

View File

@@ -1,7 +1,4 @@
base_model: cerebras/Cerebras-GPT-1.3B base_model: cerebras/Cerebras-GPT-1.3B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true
strict: false strict: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-13b-hf base_model: codellama/CodeLlama-13b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-13b-hf base_model: codellama/CodeLlama-13b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-34b-hf base_model: codellama/CodeLlama-34b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-34b-hf base_model: codellama/CodeLlama-34b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-7b-hf base_model: codellama/CodeLlama-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-7b-hf base_model: codellama/CodeLlama-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,7 +1,4 @@
base_model: LnL-AI/dbrx-base-converted-v2 base_model: LnL-AI/dbrx-base-converted-v2
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,7 +1,4 @@
base_model: LnL-AI/dbrx-base-converted-v2 base_model: LnL-AI/dbrx-base-converted-v2
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: true load_in_8bit: true

View File

@@ -1,7 +1,4 @@
base_model: LnL-AI/dbrx-base-converted-v2 base_model: LnL-AI/dbrx-base-converted-v2
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,6 +1,4 @@
base_model: deepseek-ai/DeepSeek-V2-Lite base_model: deepseek-ai/DeepSeek-V2-Lite
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,7 +1,4 @@
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16 base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,12 +1,7 @@
base_model: tiiuae/falcon-7b base_model: tiiuae/falcon-7b
# optionally might have model_type or tokenizer_type trust_remote_code: true
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,15 +1,10 @@
# 1b: tiiuae/falcon-rw-1b # 1b: tiiuae/falcon-rw-1b
# 40b: tiiuae/falcon-40b # 40b: tiiuae/falcon-40b
base_model: tiiuae/falcon-7b base_model: tiiuae/falcon-7b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false load_in_8bit: false
# enable 4bit for QLoRA # enable 4bit for QLoRA

View File

@@ -1,12 +1,7 @@
base_model: tiiuae/falcon-7b base_model: tiiuae/falcon-7b
# optionally might have model_type or tokenizer_type trust_remote_code: true
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,10 +1,7 @@
# use google/gemma-7b if you have access # use google/gemma-7b if you have access
base_model: mhenrichsen/gemma-7b base_model: mhenrichsen/gemma-7b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: google/gemma-2-9b base_model: google/gemma-2-9b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: google/gemma-2-2b base_model: google/gemma-2-2b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForSequenceClassification model_type: AutoModelForSequenceClassification
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,7 +1,4 @@
base_model: EleutherAI/gpt-j-6b base_model: EleutherAI/gpt-j-6b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true
strict: false strict: false

View File

@@ -1,7 +1,4 @@
base_model: ai21labs/Jamba-v0.1 base_model: ai21labs/Jamba-v0.1
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,6 +1,4 @@
base_model: ai21labs/Jamba-v0.1 base_model: ai21labs/Jamba-v0.1
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,8 +1,5 @@
base_model: ai21labs/AI21-Jamba-1.5-Large base_model: ai21labs/AI21-Jamba-1.5-Large
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_4bit: true load_in_4bit: true
strict: false strict: false

View File

@@ -1,10 +1,6 @@
base_model: huggyllama/llama-7b base_model: huggyllama/llama-7b
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
datasets: datasets:
- path: openaccess-ai-collective/jeopardy - path: openaccess-ai-collective/jeopardy

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,13 +1,8 @@
base_model: TheBloke/Llama-2-7B-GPTQ base_model: TheBloke/Llama-2-7B-GPTQ
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
gptq: true gptq: true
gptq_disable_exllama: true gptq_disable_exllama: true
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
tokenizer_use_fast: true tokenizer_use_fast: true
tokenizer_legacy: true tokenizer_legacy: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,5 @@
base_model: alpindale/Llama-3.2-11B-Vision-Instruct base_model: alpindale/Llama-3.2-11B-Vision-Instruct
# optionally might have model_type or tokenizer_type or processor_type
processor_type: AutoProcessor processor_type: AutoProcessor
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
strict: false strict: false
# these 3 lines are needed for now to handle vision chat templates w images # these 3 lines are needed for now to handle vision chat templates w images

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Meta-Llama-3.1-8B base_model: NousResearch/Meta-Llama-3.1-8B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
plugins: plugins:
- axolotl.integrations.liger.LigerPlugin - axolotl.integrations.liger.LigerPlugin

View File

@@ -0,0 +1,58 @@
base_model: NousResearch/Meta-Llama-3.1-8B
load_in_8bit: false
load_in_4bit: false
strict: false
datasets:
- path: tatsu-lab/alpaca
type: alpaca
dataset_prepared_path: last_run_prepared
val_set_size: 0.05
output_dir: ./outputs/out
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:
gradient_accumulation_steps: 8
micro_batch_size: 1
num_epochs: 1
optimizer: paged_adamw_8bit
lr_scheduler: cosine
learning_rate: 2e-5
train_on_inputs: false
group_by_length: false
bf16: auto
fp16:
tf32: false
tensor_parallel: 'auto'
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
early_stopping_patience:
resume_from_checkpoint:
logging_steps: 1
xformers_attention:
flash_attention: true
warmup_steps: 100
evals_per_epoch: 2
eval_table_size:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens:
pad_token: <|end_of_text|>

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Meta-Llama-3.1-8B base_model: NousResearch/Meta-Llama-3.1-8B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: meta-llama/Meta-Llama-3-8B-Instruct base_model: meta-llama/Meta-Llama-3-8B-Instruct
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Meta-Llama-3-8B-Instruct base_model: NousResearch/Meta-Llama-3-8B-Instruct
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: meta-llama/Llama-3.2-1B base_model: meta-llama/Llama-3.2-1B
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: meta-llama/Llama-3.2-1B base_model: meta-llama/Llama-3.2-1B
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Llama-3.2-1B base_model: NousResearch/Llama-3.2-1B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -0,0 +1,73 @@
base_model: NousResearch/Meta-Llama-3.1-8B
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: true
load_in_4bit: false
strict: false
datasets:
- path: mhenrichsen/alpaca_2k_test
type: alpaca
dataset_prepared_path:
val_set_size: 0.05
output_dir: ./outputs/lora-out
sequence_len: 4096
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
adapter: lora
lora_model_dir:
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_target_linear: true
lora_fan_in_fan_out:
lora_modules_to_save:
- embed_tokens
- lm_head
wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:
gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: auto
fp16:
tf32: false
tensor_parallel: 'auto'
gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention: true
s2_attention:
warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_max_new_tokens: 128
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens:
pad_token: <|end_of_text|>

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Meta-Llama-3-8B base_model: NousResearch/Meta-Llama-3-8B
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,6 +1,4 @@
base_model: meta-llama/Llama-3.2-1B base_model: meta-llama/Llama-3.2-1B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Llama-3.2-1B base_model: NousResearch/Llama-3.2-1B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,8 +1,5 @@
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16 base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_4bit: true load_in_4bit: true
strict: false strict: false

View File

@@ -1,9 +1,6 @@
base_model: casperhansen/llama-3-70b-fp16 base_model: casperhansen/llama-3-70b-fp16
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Meta-Llama-3-8B base_model: NousResearch/Meta-Llama-3-8B
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,10 +1,7 @@
base_model: state-spaces/mamba-2.8b base_model: state-spaces/mamba-2.8b
# optionally might have model_type or tokenizer_type or tokenizer_config
model_type: MambaLMHeadModel model_type: MambaLMHeadModel
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
tokenizer_config: EleutherAI/gpt-neox-20b tokenizer_config: EleutherAI/gpt-neox-20b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,10 +1,6 @@
base_model: mistral-community/Mixtral-8x22B-v0.1 base_model: mistral-community/Mixtral-8x22B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1 base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1 base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1 base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -4,11 +4,8 @@
#face problems with the special tokens. #face problems with the special tokens.
base_model: mistralai/Mistral-7B-Instruct-v0.2 base_model: mistralai/Mistral-7B-Instruct-v0.2
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,10 +1,6 @@
base_model: mistralai/Mixtral-8x7B-v0.1 base_model: mistralai/Mixtral-8x7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1 base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: mistral-community/Mixtral-8x22B-v0.1 base_model: mistral-community/Mixtral-8x22B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,10 +1,6 @@
base_model: mistralai/Mixtral-8x7B-v0.1 base_model: mistralai/Mixtral-8x7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: mistralai/Mixtral-8x7B-v0.1 base_model: mistralai/Mixtral-8x7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: mistral-community/Mixtral-8x22B-v0.1 base_model: mistral-community/Mixtral-8x22B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1 base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,5 @@
base_model: mosaicml/mpt-7b base_model: mosaicml/mpt-7b
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
load_in_8bit: false load_in_8bit: false
datasets: datasets:

View File

@@ -1,10 +1,6 @@
base_model: openlm-research/open_llama_3b_v2 base_model: openlm-research/open_llama_3b_v2
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false
strict: false strict: false

View File

@@ -1,10 +1,6 @@
base_model: openlm-research/open_llama_3b_v2 base_model: openlm-research/open_llama_3b_v2
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false
strict: false strict: false

View File

@@ -1,10 +1,6 @@
base_model: openlm-research/open_llama_3b_v2 base_model: openlm-research/open_llama_3b_v2
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true
strict: false strict: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/Phi-3.5-mini-instruct base_model: microsoft/Phi-3.5-mini-instruct
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/phi-1_5 base_model: microsoft/phi-1_5
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/phi-1_5 base_model: microsoft/phi-1_5
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: microsoft/phi-2 base_model: microsoft/phi-2
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/Phi-3-mini-4k-instruct base_model: microsoft/Phi-3-mini-4k-instruct
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,11 +1,7 @@
base_model: microsoft/Phi-3-mini-4k-instruct base_model: microsoft/Phi-3-mini-4k-instruct
# optionally might have model_type or tokenizer_type
trust_remote_code: true trust_remote_code: true
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
chat_template: phi_3 chat_template: phi_3
load_in_8bit: false load_in_8bit: false

View File

@@ -1,11 +1,7 @@
base_model: EleutherAI/pythia-12b-deduped base_model: EleutherAI/pythia-12b-deduped
base_model_ignore_patterns: pytorch* # prefer safetensors base_model_ignore_patterns: pytorch* # prefer safetensors
# optionally might have model_type or tokenizer_type
model_type: GPTNeoXForCausalLM model_type: GPTNeoXForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false
gptq: false gptq: false

View File

@@ -1,7 +1,4 @@
base_model: EleutherAI/pythia-1.4b-deduped base_model: EleutherAI/pythia-1.4b-deduped
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
datasets: datasets:
- path: teknium/GPT4-LLM-Cleaned - path: teknium/GPT4-LLM-Cleaned

View File

@@ -1,9 +1,6 @@
base_model: Qwen/Qwen-7B base_model: Qwen/Qwen-7B
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true

View File

@@ -1,9 +1,6 @@
base_model: Qwen/Qwen-7B base_model: Qwen/Qwen-7B
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true

View File

@@ -1,7 +1,4 @@
base_model: Qwen/Qwen1.5-MoE-A2.7B base_model: Qwen/Qwen1.5-MoE-A2.7B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,7 +1,4 @@
base_model: Qwen/Qwen1.5-MoE-A2.7B base_model: Qwen/Qwen1.5-MoE-A2.7B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,6 +1,4 @@
base_model: Qwen/Qwen2.5-0.5B base_model: Qwen/Qwen2.5-0.5B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
strict: false strict: false

View File

@@ -1,7 +1,4 @@
base_model: Qwen/Qwen2-7B base_model: Qwen/Qwen2-7B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1 base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
# optionally might have model_type or tokenizer_type
model_type: GPTNeoXForCausalLM model_type: GPTNeoXForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: trust_remote_code:
load_in_8bit: false load_in_8bit: false
datasets: datasets:

View File

@@ -1,7 +1,4 @@
base_model: replit/replit-code-v1-3b base_model: replit/replit-code-v1-3b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false
datasets: datasets:

View File

@@ -1,10 +1,6 @@
base_model: stabilityai/stablelm-2-1_6b base_model: stabilityai/stablelm-2-1_6b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: false load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: stabilityai/stablelm-2-1_6b base_model: stabilityai/stablelm-2-1_6b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true trust_remote_code: true
load_in_8bit: true load_in_8bit: true

View File

@@ -1,6 +1,4 @@
base_model: bigcode/starcoder2-3b base_model: bigcode/starcoder2-3b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: TinyLlama/TinyLlama_v1.1 base_model: TinyLlama/TinyLlama_v1.1
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,8 +1,5 @@
base_model: TinyLlama/TinyLlama_v1.1 base_model: TinyLlama/TinyLlama_v1.1
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true load_in_8bit: true
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,7 @@
base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: false load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: TinyLlama/TinyLlama_v1.1 base_model: TinyLlama/TinyLlama_v1.1
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -1,14 +1,9 @@
# An example finetuning Saleforce's XGen-7b model with 8k context using qlora # An example finetuning Saleforce's XGen-7b model with 8k context using qlora
# on Tim Dettmer's Guanaco dataset. # on Tim Dettmer's Guanaco dataset.
base_model: Salesforce/xgen-7b-8k-base base_model: Salesforce/xgen-7b-8k-base
# optionally might have model_type or tokenizer_type trust_remote_code: true
model_type: AutoModelForCausalLM model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false load_in_8bit: false
# enable 4bit for QLoRA # enable 4bit for QLoRA
load_in_4bit: true load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: 01-ai/Yi-34B-Chat base_model: 01-ai/Yi-34B-Chat
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false load_in_8bit: false
load_in_4bit: true load_in_4bit: true

View File

@@ -7,31 +7,26 @@ mamba-ssm==1.2.0.post1
flash-attn==2.7.0.post2 flash-attn==2.7.0.post2
xformers>=0.0.23.post1 xformers>=0.0.23.post1
autoawq==0.2.7.post3 autoawq==0.2.7.post3
liger-kernel==0.5.2 liger-kernel==0.4.2
# END section # END section
packaging==23.2 packaging==23.2
peft==0.14.0 peft==0.14.0
transformers==4.47.1 transformers>=4.46.3
tokenizers>=0.20.1 tokenizers>=0.20.1
accelerate==1.2.1 accelerate==1.2.0
datasets==3.1.0 datasets==3.1.0
deepspeed==0.16.1 deepspeed==0.16.1
trl==0.12.1
optimum==1.16.2
hf_transfer
sentencepiece
gradio==3.50.2
pydantic==2.6.3 pydantic==2.6.3
addict addict
fire fire
PyYAML>=6.0 PyYAML>=6.0
requests requests
sentencepiece
wandb wandb
einops einops
optimum==1.16.2
hf_transfer
colorama colorama
numba numba
numpy>=1.24.4,<=2.0.1 numpy>=1.24.4,<=2.0.1
@@ -41,6 +36,7 @@ scipy
scikit-learn==1.4.2 scikit-learn==1.4.2
nvidia-ml-py==12.560.30 nvidia-ml-py==12.560.30
art art
gradio==3.50.2
tensorboard tensorboard
python-dotenv==1.0.1 python-dotenv==1.0.1
@@ -49,6 +45,7 @@ s3fs>=2024.5.0
gcsfs>=2024.5.0 gcsfs>=2024.5.0
# adlfs # adlfs
trl==0.12.1
zstandard==0.22.0 zstandard==0.22.0
fastcore fastcore
@@ -58,7 +55,5 @@ langdetect==1.0.9
immutabledict==4.2.0 immutabledict==4.2.0
antlr4-python3-runtime==4.13.2 antlr4-python3-runtime==4.13.2
torchao==0.7.0 torchao==0.5.0
schedulefree==1.3.0 schedulefree==1.3.0
axolotl-contribs-lgpl==0.0.1b2

View File

@@ -32,5 +32,5 @@ else:
raise RuntimeError(f"Torch = {v} too new!") raise RuntimeError(f"Torch = {v} too new!")
x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "") x = x.format(cuda.replace(".", ""), "-ampere" if is_ampere else "")
print( print(
f'pip install unsloth-zoo==2024.12.1 && pip install --no-deps "unsloth[{x}]==2024.12.4"' f'pip install unsloth-zoo==2024.11.7 && pip install --no-deps "unsloth[{x}]==2024.11.9"'
) )

View File

@@ -1,7 +1,3 @@
"""Axolotl - Train and fine-tune large language models""" """Axolotl - Train and fine-tune large language models"""
import pkgutil
__path__ = pkgutil.extend_path(__path__, __name__) # Make this a namespace package
__version__ = "0.6.0" __version__ = "0.6.0"

View File

@@ -1,52 +0,0 @@
"""
CLI to run training on a model
"""
import logging
from pathlib import Path
from typing import Dict, Union
import fire
from dotenv import load_dotenv
from transformers.hf_argparser import HfArgumentParser
from axolotl.cli import (
check_accelerate_default_config,
check_user_token,
load_cfg,
load_datasets,
load_rl_datasets,
print_axolotl_text_art,
)
from axolotl.common.cli import TrainerCliArgs
from axolotl.evaluate import evaluate
LOG = logging.getLogger("axolotl.cli.evaluate")
def do_evaluate(cfg, cli_args) -> Dict[str, float]:
# pylint: disable=duplicate-code
print_axolotl_text_art()
check_accelerate_default_config()
check_user_token()
if cfg.rl: # and cfg.rl != "orpo":
dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args)
else:
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
return evaluate(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs) -> None:
# pylint: disable=duplicate-code
parsed_cfg = load_cfg(config, **kwargs)
parser = HfArgumentParser(TrainerCliArgs)
parsed_cli_args, _ = parser.parse_args_into_dataclasses(
return_remaining_strings=True
)
do_evaluate(parsed_cfg, parsed_cli_args)
if __name__ == "__main__":
load_dotenv()
fire.Fire(do_cli)

View File

@@ -1,207 +0,0 @@
"""CLI to convert a transformers model's attns to diff attns."""
import logging
import warnings
from pathlib import Path
from time import time
from typing import Union
import fire
import torch
import yaml
from colorama import Fore
from dotenv import load_dotenv
from transformers import HfArgumentParser
from axolotl.cli import load_cfg, print_axolotl_text_art
from axolotl.common.cli import ConvertDiffTransformerCliArgs, load_model_and_tokenizer
from axolotl.integrations.diff_transformer.convert import convert_to_diff_attn
from axolotl.utils.yaml import dump_yaml_preserved_order
LOG = logging.getLogger(__name__)
def test_inference(model, tokenizer, prompt="The quick brown fox"):
"""Run test inference and return generation time"""
try:
inputs = tokenizer(prompt, return_tensors="pt")
inputs = {
k: v.to(device=model.device, dtype=torch.long) for k, v in inputs.items()
}
start = time()
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=20,
num_beams=1,
do_sample=False,
pad_token_id=tokenizer.pad_token_id,
use_cache=False,
)
elapsed = time() - start
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
LOG.info("Prompt: %s", prompt)
LOG.info("Generated: %s", generated_text)
LOG.info("Generation time: %.2fs", elapsed)
return elapsed, generated_text
except Exception as exc:
LOG.error("Inference failed: %s", str(exc))
raise
def convert_diff_transformer(cfg, cli_args, config_path):
debug_info = {}
# Load model and tokenizer
with warnings.catch_warnings():
warnings.simplefilter("ignore")
model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args)
model.to(cfg.device, dtype=cfg.torch_dtype)
# Log original model info
LOG.info(
"Original model config:\n\t- Hidden size: %d\n\t- Num attention heads: %d",
model.config.hidden_size,
model.config.num_attention_heads,
)
# Test original model
if cli_args.debug:
LOG.info("Testing original model...")
debug_info["orig_time"], debug_info["orig_text"] = test_inference(
model, tokenizer
)
# Convert attention
LOG.info("Converting to differential attention...")
if cli_args.split_heads and cli_args.zero_init:
LOG.warning(
Fore.YELLOW
+ "Warning: Using split_heads with zero_init is not recommended; "
+ "split_heads will preclude the effects of zero_init"
+ Fore.RESET
)
try:
model = convert_to_diff_attn(
model=model,
zero_init=cli_args.zero_init,
sublayer_norm=cli_args.sublayer_norm,
split_heads=cli_args.split_heads,
)
model.to(cfg.device, dtype=cfg.torch_dtype)
except Exception as exc:
LOG.error(Fore.RED + "Conversion failed: %s" + Fore.RESET, str(exc))
raise
# Test converted model
if cli_args.debug:
LOG.info("Testing converted model...")
debug_info["conv_time"], debug_info["conv_text"] = test_inference(
model, tokenizer
)
# Save if requested
if cfg.output_dir:
# Save model and tokenizer
LOG.info("Saving converted model to %s", cfg.output_dir)
model.save_pretrained(cfg.output_dir)
tokenizer.save_pretrained(cfg.output_dir)
# Modify config to reflect new path / differential attention
output_config_path = Path(cfg.output_dir) / "axolotl_config.yml"
LOG.info("Saving updated config to %s", output_config_path)
with open(config_path, "r", encoding="utf-8") as file:
modified_cfg = yaml.safe_load(file) or {}
modified_cfg["base_model"] = cfg.output_dir
modified_cfg["diff_attention"] = True
plugin_class = (
"axolotl.integrations.diff_transformer.DifferentialTransformerPlugin"
)
if "plugins" in modified_cfg:
modified_cfg["plugins"].append(plugin_class)
else:
modified_cfg["plugins"] = [plugin_class]
dump_yaml_preserved_order(
data=modified_cfg,
reference_yaml_path=config_path,
output_path=output_config_path,
)
else:
LOG.info("Not saving converted model to disk")
LOG.info("Pass --output-dir path/to/save to save model")
if cli_args.debug:
LOG.info(
Fore.GREEN
+ "Conversion successful!\n"
+ f"Original generation time: {debug_info['orig_time']:.2f}s\n"
+ f"Converted generation time: {debug_info['conv_time']:.2f}s"
+ Fore.RESET
)
if debug_info["orig_text"] == debug_info["conv_text"]:
LOG.info(
Fore.GREEN
+ "Generations match!\n"
+ "Model generation:\n"
+ "*" * 50
+ "\n"
+ f"{debug_info['orig_text']}\n"
+ "*" * 50
+ "\n"
+ Fore.RESET
)
debug_info["generations_match"] = True
else:
message = (
"Generations do not match.\n"
+ "Original generation:\n"
+ "*" * 50
+ "\n"
+ f"{debug_info['orig_text']}\n"
+ "*" * 50
+ "\n"
+ "Converted generation:\n"
+ "*" * 50
+ "\n"
+ f"{debug_info['conv_text']}\n"
+ "*" * 50
+ "\n"
)
debug_info["generations_match"] = False
if cli_args.zero_init and not cli_args.sublayer_norm:
LOG.info(Fore.RED + message + Fore.RESET)
debug_info["match_expected"] = True
else:
LOG.info(
Fore.YELLOW
+ message
+ "However, this is expected since --zero-init"
+ " and --no-sublayer-norm were not passed."
+ Fore.RESET
)
debug_info["match_expected"] = False
return model, debug_info
def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs):
print_axolotl_text_art()
cfg = load_cfg(config, **kwargs)
parser = HfArgumentParser(ConvertDiffTransformerCliArgs)
cli_args, _ = parser.parse_args_into_dataclasses(return_remaining_strings=True)
convert_diff_transformer(cfg, cli_args, config)
if __name__ == "__main__":
load_dotenv()
fire.Fire(do_cli)

Some files were not shown because too many files have changed in this diff Show More