Compare commits

..

1 Commits

Author SHA1 Message Date
Wing Lian
79612da5c8 perform flakey patched tests in individual runner 2024-12-12 23:22:28 -05:00
226 changed files with 3787 additions and 6793 deletions

View File

@@ -1,7 +1,6 @@
name: lint
on:
# check on PRs, and manual triggers
merge_group:
pull_request:
paths:
- '**.py'

View File

@@ -25,6 +25,7 @@ jobs:
python_version: "3.11"
pytorch: 2.3.1
axolotl_extras: mamba-ssm
is_latest: true
- cuda: 124
cuda_version: 12.4.1
python_version: "3.11"
@@ -35,7 +36,6 @@ jobs:
python_version: "3.11"
pytorch: 2.5.1
axolotl_extras:
is_latest: true
runs-on: axolotl-gpu-runner
steps:
- name: Checkout
@@ -92,6 +92,7 @@ jobs:
python_version: "3.11"
pytorch: 2.3.1
axolotl_extras:
is_latest: true
- cuda: 124
cuda_version: 12.4.1
python_version: "3.11"
@@ -102,7 +103,6 @@ jobs:
python_version: "3.11"
pytorch: 2.5.1
axolotl_extras:
is_latest: true
runs-on: axolotl-gpu-runner
steps:
- name: Checkout

View File

@@ -52,7 +52,7 @@ jobs:
- name: Install Modal
run: |
python -m pip install --upgrade pip
pip install modal==0.71.8 jinja2
pip install modal==0.63.64 jinja2
- name: Update env vars
run: |
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV

View File

@@ -129,7 +129,7 @@ jobs:
- name: Install Modal
run: |
python -m pip install --upgrade pip
pip install modal==0.71.8 jinja2
pip install modal==0.63.64 jinja2
- name: Update env vars
run: |
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV

View File

@@ -1,7 +1,6 @@
name: Tests
on:
# check on push/merge to main, PRs, and manual triggers
merge_group:
push:
branches:
- "main"
@@ -61,15 +60,6 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v4
- name: Restore HF cache
id: hf-cache-restore
uses: actions/cache/restore@v4
with:
path: |
/home/runner/.cache/huggingface/hub/datasets--*
/home/runner/.cache/huggingface/hub/models--*
key: ${{ runner.os }}-hf-hub-cache-${{ hashFiles('**/conftest.py') }}
- name: Setup Python
uses: actions/setup-python@v5
with:
@@ -110,15 +100,6 @@ jobs:
run: |
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
- name: Save HF cache
id: hf-cache
uses: actions/cache/save@v4
with:
path: |
/home/runner/.cache/huggingface/hub/datasets--*
/home/runner/.cache/huggingface/hub/models--*
key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
pytest-sdist:
name: PyTest from Source Dist
runs-on: ubuntu-latest
@@ -134,15 +115,6 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v4
- name: Restore HF cache
id: hf-cache-restore
uses: actions/cache/restore@v4
with:
path: |
/home/runner/.cache/huggingface/hub/datasets--*
/home/runner/.cache/huggingface/hub/models--*
key: ${{ runner.os }}-hf-hub-cache-${{ hashFiles('**/conftest.py') }}
- name: Setup Python
uses: actions/setup-python@v5
with:
@@ -184,15 +156,6 @@ jobs:
run: |
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
- name: Save HF cache
id: hf-cache
uses: actions/cache/save@v4
with:
path: |
/home/runner/.cache/huggingface/hub/datasets--*
/home/runner/.cache/huggingface/hub/models--*
key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
docker-e2e-tests-1st:
if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }}
# this job needs to be run on self-hosted GPU runners...
@@ -207,7 +170,7 @@ jobs:
- cuda: 124
cuda_version: 12.4.1
python_version: "3.11"
pytorch: 2.5.1
pytorch: 2.4.1
num_gpus: 1
axolotl_extras:
steps:
@@ -220,7 +183,7 @@ jobs:
- name: Install Modal
run: |
python -m pip install --upgrade pip
pip install modal==0.71.8 jinja2
pip install modal==0.63.64 jinja2
- name: Update env vars
run: |
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
@@ -253,7 +216,7 @@ jobs:
- cuda: 124
cuda_version: 12.4.1
python_version: "3.11"
pytorch: 2.4.1
pytorch: 2.5.1
num_gpus: 1
axolotl_extras:
steps:
@@ -266,7 +229,7 @@ jobs:
- name: Install Modal
run: |
python -m pip install --upgrade pip
pip install modal==0.71.8 jinja2
pip install modal==0.63.64 jinja2
- name: Update env vars
run: |
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV

1
.gitignore vendored
View File

@@ -1,7 +1,6 @@
**/axolotl.egg-info
configs
last_run_prepared/
outputs
.vscode
_site/

View File

@@ -23,7 +23,7 @@ repos:
hooks:
- id: flake8
- repo: https://github.com/PyCQA/pylint
rev: v3.3.0
rev: v2.17.4
hooks:
- id: pylint
- repo: https://github.com/pre-commit/mirrors-mypy

View File

@@ -1,5 +1,5 @@
[MASTER]
init-hook="from pylint.config import find_default_config_files; import sys; sys.path.append(next(find_default_config_files()).parent.as_posix())"
init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc()))"
[TYPECHECK]
@@ -12,4 +12,3 @@ generated-members=numpy.*, torch.*
disable=missing-function-docstring, line-too-long, import-error,
too-many-arguments, too-many-locals, too-many-statements, too-many-branches, too-few-public-methods,
too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation,
too-many-positional-arguments, possibly-used-before-assignment

View File

@@ -8,7 +8,6 @@ ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
ENV GITHUB_REF="{{ GITHUB_REF }}"
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
ENV HF_HOME="{{ HF_HOME }}"
RUN apt-get update && \
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev

View File

@@ -5,6 +5,11 @@ python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/patched/
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/integrations/
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
tests=$(pytest --collect-only -q tests/e2e/each)
for t in $tests; do
pytest $t
done

View File

@@ -28,7 +28,6 @@ df_args = {
"CUDA": os.environ.get("CUDA", "121"),
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
"HF_HOME": "/workspace/data/huggingface-cache/hub",
}
dockerfile_contents = df_template.render(**df_args)
@@ -49,12 +48,6 @@ cicd_image = (
app = App("Axolotl CI/CD", secrets=[])
hf_cache_volume = modal.Volume.from_name(
"axolotl-ci-hf-hub-cache", create_if_missing=True
)
VOLUME_CONFIG = {
"/workspace/data/huggingface-cache/hub": hf_cache_volume,
}
N_GPUS = int(os.environ.get("N_GPUS", 2))
GPU_CONFIG = modal.gpu.H100(count=N_GPUS)
@@ -74,7 +67,6 @@ def run_cmd(cmd: str, run_folder: str):
timeout=60 * 60,
cpu=8.0,
memory=131072 * N_GPUS,
volumes=VOLUME_CONFIG,
)
def cicd_pytest():
run_cmd("./cicd/multigpu.sh", "/workspace/axolotl")

View File

@@ -29,7 +29,6 @@ df_args = {
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
"NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
"HF_HOME": "/workspace/data/huggingface-cache/hub",
}
dockerfile_contents = df_template.render(**df_args)
@@ -51,15 +50,9 @@ cicd_image = (
app = App("Axolotl CI/CD", secrets=[])
hf_cache_volume = modal.Volume.from_name(
"axolotl-ci-hf-hub-cache", create_if_missing=True
)
VOLUME_CONFIG = {
"/workspace/data/huggingface-cache/hub": hf_cache_volume,
}
N_GPUS = int(os.environ.get("N_GPUS", 1))
GPU_CONFIG = modal.gpu.L40S(count=N_GPUS)
GPU_CONFIG = modal.gpu.A10G(count=N_GPUS)
def run_cmd(cmd: str, run_folder: str):
@@ -76,7 +69,6 @@ def run_cmd(cmd: str, run_folder: str):
timeout=60 * 60,
cpu=8.0,
memory=131072,
volumes=VOLUME_CONFIG,
)
def cicd_pytest():
run_cmd("./cicd/cicd.sh", "/workspace/axolotl")

View File

@@ -1,27 +0,0 @@
{
"zero_optimization": {
"stage": 1,
"overlap_comm": true
},
"bf16": {
"enabled": "auto"
},
"fp16": {
"enabled": "auto",
"auto_cast": false,
"loss_scale": 0,
"initial_scale_power": 32,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
},
"compile": {
"disable": false,
"backend": "inductor"
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}

View File

@@ -127,40 +127,34 @@ datasets:
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
chat_template: tokenizer_default
# Custom jinja chat template. Used only if `chat_template: jinja` or empty.
# Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
chat_template_jinja:
# Key containing the messages (default: "messages")
# The key in the data example that contains the messages. Default is "messages".
field_messages: messages
# Key for role in each message (default: "role")
# The key in the message turn that contains the role. Default is "role".
message_field_role: role
# Key for content in each message (default: "content")
# The key in the message turn that contains the content. Default is "content".
message_field_content: content
# Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
# Optional[Dict[str, List]]. Roles mapping for the messages.
roles:
user: ["human", "user"]
assistant: ["gpt", "assistant"]
assistant: ["gpt", "assistant", "ai"]
system: ["system"]
tool: ["tool"]
# IMPORTANT: The following fields determine which parts of the conversation to train on.
# Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
# See examples at `docs/dataset-formats/conversation.qmd`
# Note: If the below 4 fields are empty, defaults to training only on the last message.
## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
roles_to_train: ["assistant"] # default
roles_to_train: ["gpt", "assistant"]
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
# - all: train on all EOS tokens
# - turn (default): train on the EOS token at the end of each trainable turn
# - turn: train on the EOS token at the end of each trainable turn
# - last: train on the last EOS token in the conversation
train_on_eos: last
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
message_field_training: training
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
# See example at `docs/dataset-formats/conversation.qmd`
message_field_training_detail: train_detail
@@ -245,9 +239,6 @@ sample_packing_group_size: 100000
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
sample_packing_bin_size: 200
# Use batch flattening for speedups when not using sample_packing
batch_flattening:
# Passed through to transformers when loading the model when launched without accelerate
# Use `sequential` when training w/ model parallelism to limit memory
device_map:
@@ -340,8 +331,7 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
output_dir: ./completed-model
# Whether to use torch.compile and which backend to use
# setting to `auto` will enable torch compile when torch>=2.5.1
torch_compile: # Optional[Union[Literal["auto"], bool]]
torch_compile: # bool
torch_compile_backend: # Optional[str]
# Training hyperparameters
@@ -373,10 +363,6 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
# see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
# snapshots can be visualized @ https://pytorch.org/memory_viz
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)

View File

@@ -68,8 +68,6 @@ We recommend checking the below examples for other usecases.
datasets:
- path: ...
type: chat_template
roles_to_train:
train_on_eos:
```
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
@@ -79,7 +77,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
datasets:
- path: ...
type: chat_template
roles_to_train: ["assistant"] # default value
roles_to_train: ["assistant"]
```
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
@@ -89,6 +87,7 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
datasets:
- path: ...
type: chat_template
roles_to_train: ["assistant"]
```
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
@@ -100,6 +99,7 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
datasets:
- path: ...
type: chat_template
roles_to_train: ["assistant"]
```
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation

View File

@@ -19,14 +19,7 @@ For pretraining, there is no prompt template or roles. The only required field
Axolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming:
```{.yaml filename="config.yaml"}
pretraining_dataset:
- name:
path:
split:
text_column: # column in dataset with the data, usually `text`
type: pretrain
trust_remote_code:
skip: # number of rows of data to skip over from the beginning
pretraining_dataset: # hf path only
...
```

View File

@@ -29,7 +29,7 @@ datasets:
type: chatml.intel
- path: argilla/ultrafeedback-binarized-preferences
split: train
type: chatml
type: chatml.argilla
```
#### IPO

View File

@@ -1,10 +1,6 @@
base_model: cerebras/btlm-3b-8k-base
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: GPT2Tokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
tokenizer_use_fast: true
tokenizer_legacy: true

View File

@@ -1,7 +1,4 @@
base_model: cerebras/Cerebras-GPT-1.3B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true
strict: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-13b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-13b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-34b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-34b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: codellama/CodeLlama-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: CodeLlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,7 +1,4 @@
base_model: LnL-AI/dbrx-base-converted-v2
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,7 +1,4 @@
base_model: LnL-AI/dbrx-base-converted-v2
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: true

View File

@@ -1,7 +1,4 @@
base_model: LnL-AI/dbrx-base-converted-v2
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,6 +1,4 @@
base_model: deepseek-ai/DeepSeek-V2-Lite
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,7 +1,4 @@
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,12 +1,7 @@
base_model: tiiuae/falcon-7b
# optionally might have model_type or tokenizer_type
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,15 +1,10 @@
# 1b: tiiuae/falcon-rw-1b
# 40b: tiiuae/falcon-40b
base_model: tiiuae/falcon-7b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
# enable 4bit for QLoRA

View File

@@ -1,12 +1,7 @@
base_model: tiiuae/falcon-7b
# optionally might have model_type or tokenizer_type
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,10 +1,7 @@
# use google/gemma-7b if you have access
base_model: mhenrichsen/gemma-7b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: google/gemma-2-9b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: google/gemma-2-2b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForSequenceClassification
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,7 +1,4 @@
base_model: EleutherAI/gpt-j-6b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true
strict: false

View File

@@ -1,7 +1,4 @@
base_model: ai21labs/Jamba-v0.1
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,6 +1,4 @@
base_model: ai21labs/Jamba-v0.1
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,8 +1,5 @@
base_model: ai21labs/AI21-Jamba-1.5-Large
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_4bit: true
strict: false

View File

@@ -1,10 +1,6 @@
base_model: huggyllama/llama-7b
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
datasets:
- path: openaccess-ai-collective/jeopardy

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,13 +1,8 @@
base_model: TheBloke/Llama-2-7B-GPTQ
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
gptq: true
gptq_disable_exllama: true
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
tokenizer_use_fast: true
tokenizer_legacy: true
load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Llama-2-7b-hf
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,5 @@
base_model: alpindale/Llama-3.2-11B-Vision-Instruct
# optionally might have model_type or tokenizer_type or processor_type
processor_type: AutoProcessor
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
strict: false
# these 3 lines are needed for now to handle vision chat templates w images

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Meta-Llama-3.1-8B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
plugins:
- axolotl.integrations.liger.LigerPlugin

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Meta-Llama-3.1-8B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: meta-llama/Meta-Llama-3-8B-Instruct
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Meta-Llama-3-8B-Instruct
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: meta-llama/Llama-3.2-1B
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: meta-llama/Llama-3.2-1B
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Llama-3.2-1B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Meta-Llama-3-8B
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,6 +1,4 @@
base_model: meta-llama/Llama-3.2-1B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,6 +1,4 @@
base_model: NousResearch/Llama-3.2-1B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,8 +1,5 @@
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_4bit: true
strict: false

View File

@@ -1,9 +1,6 @@
base_model: casperhansen/llama-3-70b-fp16
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: NousResearch/Meta-Llama-3-8B
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,10 +1,7 @@
base_model: state-spaces/mamba-2.8b
# optionally might have model_type or tokenizer_type or tokenizer_config
model_type: MambaLMHeadModel
tokenizer_type: AutoTokenizer
tokenizer_config: EleutherAI/gpt-neox-20b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,10 +1,6 @@
base_model: mistral-community/Mixtral-8x22B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -4,11 +4,8 @@
#face problems with the special tokens.
base_model: mistralai/Mistral-7B-Instruct-v0.2
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,10 +1,6 @@
base_model: mistralai/Mixtral-8x7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: mistral-community/Mixtral-8x22B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,10 +1,6 @@
base_model: mistralai/Mixtral-8x7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: mistralai/Mixtral-8x7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: mistral-community/Mixtral-8x22B-v0.1
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,9 +1,6 @@
base_model: mistralai/Mistral-7B-v0.1
# optionally might have model_type or tokenizer_type
model_type: MistralForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,5 @@
base_model: mosaicml/mpt-7b
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
load_in_8bit: false
datasets:

View File

@@ -1,10 +1,6 @@
base_model: openlm-research/open_llama_3b_v2
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false
strict: false

View File

@@ -1,10 +1,6 @@
base_model: openlm-research/open_llama_3b_v2
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false
strict: false

View File

@@ -1,10 +1,6 @@
base_model: openlm-research/open_llama_3b_v2
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true
strict: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/Phi-3.5-mini-instruct
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/phi-1_5
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/phi-1_5
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: microsoft/phi-2
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,9 +1,6 @@
base_model: microsoft/Phi-3-mini-4k-instruct
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false

View File

@@ -1,11 +1,7 @@
base_model: microsoft/Phi-3-mini-4k-instruct
# optionally might have model_type or tokenizer_type
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
chat_template: phi_3
load_in_8bit: false

View File

@@ -1,11 +1,7 @@
base_model: EleutherAI/pythia-12b-deduped
base_model_ignore_patterns: pytorch* # prefer safetensors
# optionally might have model_type or tokenizer_type
model_type: GPTNeoXForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: false
gptq: false

View File

@@ -1,7 +1,4 @@
base_model: EleutherAI/pythia-1.4b-deduped
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
datasets:
- path: teknium/GPT4-LLM-Cleaned

View File

@@ -1,9 +1,6 @@
base_model: Qwen/Qwen-7B
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true

View File

@@ -1,9 +1,6 @@
base_model: Qwen/Qwen-7B
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true

View File

@@ -1,7 +1,4 @@
base_model: Qwen/Qwen1.5-MoE-A2.7B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,7 +1,4 @@
base_model: Qwen/Qwen1.5-MoE-A2.7B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,6 +1,4 @@
base_model: Qwen/Qwen2.5-0.5B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
strict: false

View File

@@ -1,7 +1,4 @@
base_model: Qwen/Qwen2-7B
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
# optionally might have model_type or tokenizer_type
model_type: GPTNeoXForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code:
load_in_8bit: false
datasets:

View File

@@ -1,7 +1,4 @@
base_model: replit/replit-code-v1-3b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false
datasets:

View File

@@ -1,10 +1,6 @@
base_model: stabilityai/stablelm-2-1_6b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: false

View File

@@ -1,10 +1,6 @@
base_model: stabilityai/stablelm-2-1_6b
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
trust_remote_code: true
load_in_8bit: true

View File

@@ -1,6 +1,4 @@
base_model: bigcode/starcoder2-3b
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: false
load_in_4bit: true

View File

@@ -1,9 +1,6 @@
base_model: TinyLlama/TinyLlama_v1.1
# optionally might have model_type or tokenizer_type
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

View File

@@ -1,8 +1,5 @@
base_model: TinyLlama/TinyLlama_v1.1
# optionally might have model_type or tokenizer_type
tokenizer_type: AutoTokenizer
# Automatically upload checkpoint and final model to HF
# hub_model_id: username/custom_model_name
load_in_8bit: true
load_in_4bit: false

Some files were not shown because too many files have changed in this diff Show More