Compare commits
1 Commits
cli-refact
...
base-model
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d657ff9c94 |
1
.github/workflows/lint.yml
vendored
1
.github/workflows/lint.yml
vendored
@@ -1,7 +1,6 @@
|
|||||||
name: lint
|
name: lint
|
||||||
on:
|
on:
|
||||||
# check on PRs, and manual triggers
|
# check on PRs, and manual triggers
|
||||||
merge_group:
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- '**.py'
|
- '**.py'
|
||||||
|
|||||||
4
.github/workflows/main.yml
vendored
4
.github/workflows/main.yml
vendored
@@ -25,6 +25,7 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.3.1
|
pytorch: 2.3.1
|
||||||
axolotl_extras: mamba-ssm
|
axolotl_extras: mamba-ssm
|
||||||
|
is_latest: true
|
||||||
- cuda: 124
|
- cuda: 124
|
||||||
cuda_version: 12.4.1
|
cuda_version: 12.4.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -35,7 +36,6 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.5.1
|
pytorch: 2.5.1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
is_latest: true
|
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -92,6 +92,7 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.3.1
|
pytorch: 2.3.1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
|
is_latest: true
|
||||||
- cuda: 124
|
- cuda: 124
|
||||||
cuda_version: 12.4.1
|
cuda_version: 12.4.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -102,7 +103,6 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.5.1
|
pytorch: 2.5.1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
is_latest: true
|
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
|
|||||||
2
.github/workflows/multi-gpu-e2e.yml
vendored
2
.github/workflows/multi-gpu-e2e.yml
vendored
@@ -52,7 +52,7 @@ jobs:
|
|||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==0.71.8 jinja2
|
pip install modal==0.63.64 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
|
|||||||
2
.github/workflows/tests-nightly.yml
vendored
2
.github/workflows/tests-nightly.yml
vendored
@@ -129,7 +129,7 @@ jobs:
|
|||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==0.71.8 jinja2
|
pip install modal==0.63.64 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
|
|||||||
41
.github/workflows/tests.yml
vendored
41
.github/workflows/tests.yml
vendored
@@ -1,7 +1,6 @@
|
|||||||
name: Tests
|
name: Tests
|
||||||
on:
|
on:
|
||||||
# check on push/merge to main, PRs, and manual triggers
|
# check on push/merge to main, PRs, and manual triggers
|
||||||
merge_group:
|
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- "main"
|
- "main"
|
||||||
@@ -61,15 +60,6 @@ jobs:
|
|||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Restore HF cache
|
|
||||||
id: hf-cache-restore
|
|
||||||
uses: actions/cache/restore@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
/home/runner/.cache/huggingface/hub/datasets--*
|
|
||||||
/home/runner/.cache/huggingface/hub/models--*
|
|
||||||
key: ${{ runner.os }}-hf-hub-cache-${{ hashFiles('**/conftest.py') }}
|
|
||||||
|
|
||||||
- name: Setup Python
|
- name: Setup Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
@@ -110,15 +100,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||||
|
|
||||||
- name: Save HF cache
|
|
||||||
id: hf-cache
|
|
||||||
uses: actions/cache/save@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
/home/runner/.cache/huggingface/hub/datasets--*
|
|
||||||
/home/runner/.cache/huggingface/hub/models--*
|
|
||||||
key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
|
|
||||||
|
|
||||||
pytest-sdist:
|
pytest-sdist:
|
||||||
name: PyTest from Source Dist
|
name: PyTest from Source Dist
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -134,15 +115,6 @@ jobs:
|
|||||||
- name: Check out repository code
|
- name: Check out repository code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Restore HF cache
|
|
||||||
id: hf-cache-restore
|
|
||||||
uses: actions/cache/restore@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
/home/runner/.cache/huggingface/hub/datasets--*
|
|
||||||
/home/runner/.cache/huggingface/hub/models--*
|
|
||||||
key: ${{ runner.os }}-hf-hub-cache-${{ hashFiles('**/conftest.py') }}
|
|
||||||
|
|
||||||
- name: Setup Python
|
- name: Setup Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
@@ -184,15 +156,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||||
|
|
||||||
- name: Save HF cache
|
|
||||||
id: hf-cache
|
|
||||||
uses: actions/cache/save@v4
|
|
||||||
with:
|
|
||||||
path: |
|
|
||||||
/home/runner/.cache/huggingface/hub/datasets--*
|
|
||||||
/home/runner/.cache/huggingface/hub/models--*
|
|
||||||
key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
|
|
||||||
|
|
||||||
docker-e2e-tests-1st:
|
docker-e2e-tests-1st:
|
||||||
if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }}
|
if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }}
|
||||||
# this job needs to be run on self-hosted GPU runners...
|
# this job needs to be run on self-hosted GPU runners...
|
||||||
@@ -220,7 +183,7 @@ jobs:
|
|||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==0.71.8 jinja2
|
pip install modal==0.63.64 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
@@ -266,7 +229,7 @@ jobs:
|
|||||||
- name: Install Modal
|
- name: Install Modal
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install modal==0.71.8 jinja2
|
pip install modal==0.63.64 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,7 +1,6 @@
|
|||||||
**/axolotl.egg-info
|
**/axolotl.egg-info
|
||||||
configs
|
configs
|
||||||
last_run_prepared/
|
last_run_prepared/
|
||||||
outputs
|
|
||||||
.vscode
|
.vscode
|
||||||
_site/
|
_site/
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
- repo: https://github.com/PyCQA/pylint
|
- repo: https://github.com/PyCQA/pylint
|
||||||
rev: v3.3.0
|
rev: v2.17.4
|
||||||
hooks:
|
hooks:
|
||||||
- id: pylint
|
- id: pylint
|
||||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[MASTER]
|
[MASTER]
|
||||||
init-hook="from pylint.config import find_default_config_files; import sys; sys.path.append(next(find_default_config_files()).parent.as_posix())"
|
init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc()))"
|
||||||
|
|
||||||
[TYPECHECK]
|
[TYPECHECK]
|
||||||
|
|
||||||
@@ -12,4 +12,3 @@ generated-members=numpy.*, torch.*
|
|||||||
disable=missing-function-docstring, line-too-long, import-error,
|
disable=missing-function-docstring, line-too-long, import-error,
|
||||||
too-many-arguments, too-many-locals, too-many-statements, too-many-branches, too-few-public-methods,
|
too-many-arguments, too-many-locals, too-many-statements, too-many-branches, too-few-public-methods,
|
||||||
too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation,
|
too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation,
|
||||||
too-many-positional-arguments, possibly-used-before-assignment
|
|
||||||
|
|||||||
@@ -478,7 +478,7 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
|
|||||||
|
|
||||||
- model
|
- model
|
||||||
```yaml
|
```yaml
|
||||||
base_model: ./llama-7b-hf # local or huggingface repo
|
base_model: ./llama-7b-hf/ # local or huggingface repo
|
||||||
```
|
```
|
||||||
Note: The code will load the right architecture.
|
Note: The code will load the right architecture.
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
|
|||||||
ENV GITHUB_REF="{{ GITHUB_REF }}"
|
ENV GITHUB_REF="{{ GITHUB_REF }}"
|
||||||
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
ENV GITHUB_SHA="{{ GITHUB_SHA }}"
|
||||||
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
|
||||||
ENV HF_HOME="{{ HF_HOME }}"
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev
|
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev
|
||||||
|
|||||||
@@ -5,6 +5,6 @@ python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
|
|||||||
|
|
||||||
pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
|
pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
|
||||||
# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
|
# pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
|
||||||
pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
|
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/patched/
|
||||||
pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
|
pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/integrations/
|
||||||
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
|
pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ df_args = {
|
|||||||
"CUDA": os.environ.get("CUDA", "121"),
|
"CUDA": os.environ.get("CUDA", "121"),
|
||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dockerfile_contents = df_template.render(**df_args)
|
dockerfile_contents = df_template.render(**df_args)
|
||||||
@@ -49,12 +48,6 @@ cicd_image = (
|
|||||||
|
|
||||||
app = App("Axolotl CI/CD", secrets=[])
|
app = App("Axolotl CI/CD", secrets=[])
|
||||||
|
|
||||||
hf_cache_volume = modal.Volume.from_name(
|
|
||||||
"axolotl-ci-hf-hub-cache", create_if_missing=True
|
|
||||||
)
|
|
||||||
VOLUME_CONFIG = {
|
|
||||||
"/workspace/data/huggingface-cache/hub": hf_cache_volume,
|
|
||||||
}
|
|
||||||
|
|
||||||
N_GPUS = int(os.environ.get("N_GPUS", 2))
|
N_GPUS = int(os.environ.get("N_GPUS", 2))
|
||||||
GPU_CONFIG = modal.gpu.H100(count=N_GPUS)
|
GPU_CONFIG = modal.gpu.H100(count=N_GPUS)
|
||||||
@@ -74,7 +67,6 @@ def run_cmd(cmd: str, run_folder: str):
|
|||||||
timeout=60 * 60,
|
timeout=60 * 60,
|
||||||
cpu=8.0,
|
cpu=8.0,
|
||||||
memory=131072 * N_GPUS,
|
memory=131072 * N_GPUS,
|
||||||
volumes=VOLUME_CONFIG,
|
|
||||||
)
|
)
|
||||||
def cicd_pytest():
|
def cicd_pytest():
|
||||||
run_cmd("./cicd/multigpu.sh", "/workspace/axolotl")
|
run_cmd("./cicd/multigpu.sh", "/workspace/axolotl")
|
||||||
|
|||||||
@@ -29,7 +29,6 @@ df_args = {
|
|||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
"NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
|
"NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
|
||||||
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
dockerfile_contents = df_template.render(**df_args)
|
dockerfile_contents = df_template.render(**df_args)
|
||||||
@@ -51,12 +50,6 @@ cicd_image = (
|
|||||||
|
|
||||||
app = App("Axolotl CI/CD", secrets=[])
|
app = App("Axolotl CI/CD", secrets=[])
|
||||||
|
|
||||||
hf_cache_volume = modal.Volume.from_name(
|
|
||||||
"axolotl-ci-hf-hub-cache", create_if_missing=True
|
|
||||||
)
|
|
||||||
VOLUME_CONFIG = {
|
|
||||||
"/workspace/data/huggingface-cache/hub": hf_cache_volume,
|
|
||||||
}
|
|
||||||
|
|
||||||
N_GPUS = int(os.environ.get("N_GPUS", 1))
|
N_GPUS = int(os.environ.get("N_GPUS", 1))
|
||||||
GPU_CONFIG = modal.gpu.A10G(count=N_GPUS)
|
GPU_CONFIG = modal.gpu.A10G(count=N_GPUS)
|
||||||
@@ -76,7 +69,6 @@ def run_cmd(cmd: str, run_folder: str):
|
|||||||
timeout=60 * 60,
|
timeout=60 * 60,
|
||||||
cpu=8.0,
|
cpu=8.0,
|
||||||
memory=131072,
|
memory=131072,
|
||||||
volumes=VOLUME_CONFIG,
|
|
||||||
)
|
)
|
||||||
def cicd_pytest():
|
def cicd_pytest():
|
||||||
run_cmd("./cicd/cicd.sh", "/workspace/axolotl")
|
run_cmd("./cicd/cicd.sh", "/workspace/axolotl")
|
||||||
|
|||||||
@@ -1,27 +0,0 @@
|
|||||||
{
|
|
||||||
"zero_optimization": {
|
|
||||||
"stage": 1,
|
|
||||||
"overlap_comm": true
|
|
||||||
},
|
|
||||||
"bf16": {
|
|
||||||
"enabled": "auto"
|
|
||||||
},
|
|
||||||
"fp16": {
|
|
||||||
"enabled": "auto",
|
|
||||||
"auto_cast": false,
|
|
||||||
"loss_scale": 0,
|
|
||||||
"initial_scale_power": 32,
|
|
||||||
"loss_scale_window": 1000,
|
|
||||||
"hysteresis": 2,
|
|
||||||
"min_loss_scale": 1
|
|
||||||
},
|
|
||||||
"compile": {
|
|
||||||
"disable": false,
|
|
||||||
"backend": "inductor"
|
|
||||||
},
|
|
||||||
"gradient_accumulation_steps": "auto",
|
|
||||||
"gradient_clipping": "auto",
|
|
||||||
"train_batch_size": "auto",
|
|
||||||
"train_micro_batch_size_per_gpu": "auto",
|
|
||||||
"wall_clock_breakdown": false
|
|
||||||
}
|
|
||||||
@@ -127,40 +127,34 @@ datasets:
|
|||||||
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
|
# - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
|
||||||
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
|
# - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
|
||||||
chat_template: tokenizer_default
|
chat_template: tokenizer_default
|
||||||
|
# Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
|
||||||
# Custom jinja chat template. Used only if `chat_template: jinja` or empty.
|
|
||||||
chat_template_jinja:
|
chat_template_jinja:
|
||||||
|
# The key in the data example that contains the messages. Default is "messages".
|
||||||
# Key containing the messages (default: "messages")
|
|
||||||
field_messages: messages
|
field_messages: messages
|
||||||
# Key for role in each message (default: "role")
|
# The key in the message turn that contains the role. Default is "role".
|
||||||
message_field_role: role
|
message_field_role: role
|
||||||
# Key for content in each message (default: "content")
|
# The key in the message turn that contains the content. Default is "content".
|
||||||
message_field_content: content
|
message_field_content: content
|
||||||
|
# Optional[Dict[str, List]]. Roles mapping for the messages.
|
||||||
# Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
|
|
||||||
roles:
|
roles:
|
||||||
user: ["human", "user"]
|
user: ["human", "user"]
|
||||||
assistant: ["gpt", "assistant"]
|
assistant: ["gpt", "assistant", "ai"]
|
||||||
system: ["system"]
|
system: ["system"]
|
||||||
tool: ["tool"]
|
|
||||||
|
|
||||||
# IMPORTANT: The following fields determine which parts of the conversation to train on.
|
## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.
|
||||||
# Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
|
|
||||||
# See examples at `docs/dataset-formats/conversation.qmd`
|
|
||||||
# Note: If the below 4 fields are empty, defaults to training only on the last message.
|
|
||||||
|
|
||||||
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
|
# Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
|
||||||
roles_to_train: ["assistant"] # default
|
roles_to_train: ["gpt", "assistant"]
|
||||||
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
|
# Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
|
||||||
# - all: train on all EOS tokens
|
# - all: train on all EOS tokens
|
||||||
# - turn (default): train on the EOS token at the end of each trainable turn
|
# - turn: train on the EOS token at the end of each trainable turn
|
||||||
# - last: train on the last EOS token in the conversation
|
# - last: train on the last EOS token in the conversation
|
||||||
train_on_eos: last
|
train_on_eos: last
|
||||||
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
|
# The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
|
||||||
message_field_training: training
|
message_field_training: training
|
||||||
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
|
# The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
|
||||||
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
|
# The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
|
||||||
|
# See example at `docs/dataset-formats/conversation.qmd`
|
||||||
message_field_training_detail: train_detail
|
message_field_training_detail: train_detail
|
||||||
|
|
||||||
|
|
||||||
@@ -245,9 +239,6 @@ sample_packing_group_size: 100000
|
|||||||
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
|
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
|
||||||
sample_packing_bin_size: 200
|
sample_packing_bin_size: 200
|
||||||
|
|
||||||
# Use batch flattening for speedups when not using sample_packing
|
|
||||||
batch_flattening:
|
|
||||||
|
|
||||||
# Passed through to transformers when loading the model when launched without accelerate
|
# Passed through to transformers when loading the model when launched without accelerate
|
||||||
# Use `sequential` when training w/ model parallelism to limit memory
|
# Use `sequential` when training w/ model parallelism to limit memory
|
||||||
device_map:
|
device_map:
|
||||||
@@ -340,8 +331,7 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
|
|||||||
output_dir: ./completed-model
|
output_dir: ./completed-model
|
||||||
|
|
||||||
# Whether to use torch.compile and which backend to use
|
# Whether to use torch.compile and which backend to use
|
||||||
# setting to `auto` will enable torch compile when torch>=2.5.1
|
torch_compile: # bool
|
||||||
torch_compile: # Optional[Union[Literal["auto"], bool]]
|
|
||||||
torch_compile_backend: # Optional[str]
|
torch_compile_backend: # Optional[str]
|
||||||
|
|
||||||
# Training hyperparameters
|
# Training hyperparameters
|
||||||
@@ -373,10 +363,6 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
|
|||||||
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
|
eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
|
||||||
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
|
eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
|
||||||
|
|
||||||
profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
|
|
||||||
# see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
|
|
||||||
# snapshots can be visualized @ https://pytorch.org/memory_viz
|
|
||||||
|
|
||||||
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
|
loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
|
||||||
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
|
loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
|
||||||
|
|
||||||
|
|||||||
@@ -68,8 +68,6 @@ We recommend checking the below examples for other usecases.
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
roles_to_train:
|
|
||||||
train_on_eos:
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
|
2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
|
||||||
@@ -79,7 +77,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
roles_to_train: ["assistant"] # default value
|
roles_to_train: ["assistant"]
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
|
3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
|
||||||
@@ -89,6 +87,7 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
|
roles_to_train: ["assistant"]
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
|
4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
|
||||||
@@ -100,6 +99,7 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
|
|||||||
datasets:
|
datasets:
|
||||||
- path: ...
|
- path: ...
|
||||||
type: chat_template
|
type: chat_template
|
||||||
|
roles_to_train: ["assistant"]
|
||||||
```
|
```
|
||||||
|
|
||||||
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
|
5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
|
||||||
|
|||||||
@@ -19,14 +19,7 @@ For pretraining, there is no prompt template or roles. The only required field
|
|||||||
Axolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming:
|
Axolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming:
|
||||||
|
|
||||||
```{.yaml filename="config.yaml"}
|
```{.yaml filename="config.yaml"}
|
||||||
pretraining_dataset:
|
pretraining_dataset: # hf path only
|
||||||
- name:
|
|
||||||
path:
|
|
||||||
split:
|
|
||||||
text_column: # column in dataset with the data, usually `text`
|
|
||||||
type: pretrain
|
|
||||||
trust_remote_code:
|
|
||||||
skip: # number of rows of data to skip over from the beginning
|
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: cerebras/btlm-3b-8k-base
|
base_model: cerebras/btlm-3b-8k-base
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: GPT2Tokenizer
|
tokenizer_type: GPT2Tokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
tokenizer_use_fast: true
|
tokenizer_use_fast: true
|
||||||
tokenizer_legacy: true
|
tokenizer_legacy: true
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: cerebras/Cerebras-GPT-1.3B
|
base_model: cerebras/Cerebras-GPT-1.3B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-13b-hf
|
base_model: codellama/CodeLlama-13b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-13b-hf
|
base_model: codellama/CodeLlama-13b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-34b-hf
|
base_model: codellama/CodeLlama-34b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-34b-hf
|
base_model: codellama/CodeLlama-34b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-7b-hf
|
base_model: codellama/CodeLlama-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: codellama/CodeLlama-7b-hf
|
base_model: codellama/CodeLlama-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: CodeLlamaTokenizer
|
tokenizer_type: CodeLlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: LnL-AI/dbrx-base-converted-v2
|
base_model: LnL-AI/dbrx-base-converted-v2
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: LnL-AI/dbrx-base-converted-v2
|
base_model: LnL-AI/dbrx-base-converted-v2
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: LnL-AI/dbrx-base-converted-v2
|
base_model: LnL-AI/dbrx-base-converted-v2
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: deepseek-ai/DeepSeek-V2-Lite
|
base_model: deepseek-ai/DeepSeek-V2-Lite
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
|
base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
base_model: tiiuae/falcon-7b
|
base_model: tiiuae/falcon-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,15 +1,10 @@
|
|||||||
# 1b: tiiuae/falcon-rw-1b
|
# 1b: tiiuae/falcon-rw-1b
|
||||||
# 40b: tiiuae/falcon-40b
|
# 40b: tiiuae/falcon-40b
|
||||||
base_model: tiiuae/falcon-7b
|
base_model: tiiuae/falcon-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
|
||||||
tokenizer_type: AutoTokenizer
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
model_type: AutoModelForCausalLM
|
||||||
|
tokenizer_type: AutoTokenizer
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
# enable 4bit for QLoRA
|
# enable 4bit for QLoRA
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
base_model: tiiuae/falcon-7b
|
base_model: tiiuae/falcon-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
# use google/gemma-7b if you have access
|
# use google/gemma-7b if you have access
|
||||||
base_model: mhenrichsen/gemma-7b
|
base_model: mhenrichsen/gemma-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: google/gemma-2-9b
|
base_model: google/gemma-2-9b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: google/gemma-2-2b
|
base_model: google/gemma-2-2b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForSequenceClassification
|
model_type: AutoModelForSequenceClassification
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: EleutherAI/gpt-j-6b
|
base_model: EleutherAI/gpt-j-6b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: ai21labs/Jamba-v0.1
|
base_model: ai21labs/Jamba-v0.1
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: ai21labs/Jamba-v0.1
|
base_model: ai21labs/Jamba-v0.1
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
base_model: ai21labs/AI21-Jamba-1.5-Large
|
base_model: ai21labs/AI21-Jamba-1.5-Large
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: huggyllama/llama-7b
|
base_model: huggyllama/llama-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
- path: openaccess-ai-collective/jeopardy
|
- path: openaccess-ai-collective/jeopardy
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,13 +1,8 @@
|
|||||||
base_model: TheBloke/Llama-2-7B-GPTQ
|
base_model: TheBloke/Llama-2-7B-GPTQ
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
|
||||||
tokenizer_type: LlamaTokenizer
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
gptq: true
|
gptq: true
|
||||||
gptq_disable_exllama: true
|
gptq_disable_exllama: true
|
||||||
|
model_type: AutoModelForCausalLM
|
||||||
|
tokenizer_type: LlamaTokenizer
|
||||||
tokenizer_use_fast: true
|
tokenizer_use_fast: true
|
||||||
tokenizer_legacy: true
|
tokenizer_legacy: true
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Llama-2-7b-hf
|
base_model: NousResearch/Llama-2-7b-hf
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
base_model: alpindale/Llama-3.2-11B-Vision-Instruct
|
base_model: alpindale/Llama-3.2-11B-Vision-Instruct
|
||||||
# optionally might have model_type or tokenizer_type or processor_type
|
|
||||||
processor_type: AutoProcessor
|
processor_type: AutoProcessor
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
strict: false
|
strict: false
|
||||||
|
|
||||||
# these 3 lines are needed for now to handle vision chat templates w images
|
# these 3 lines are needed for now to handle vision chat templates w images
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3.1-8B
|
base_model: NousResearch/Meta-Llama-3.1-8B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
plugins:
|
plugins:
|
||||||
- axolotl.integrations.liger.LigerPlugin
|
- axolotl.integrations.liger.LigerPlugin
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3.1-8B
|
base_model: NousResearch/Meta-Llama-3.1-8B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3-8B-Instruct
|
base_model: NousResearch/Meta-Llama-3-8B-Instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: meta-llama/Llama-3.2-1B
|
base_model: meta-llama/Llama-3.2-1B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: meta-llama/Llama-3.2-1B
|
base_model: meta-llama/Llama-3.2-1B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Llama-3.2-1B
|
base_model: NousResearch/Llama-3.2-1B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3-8B
|
base_model: NousResearch/Meta-Llama-3-8B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: meta-llama/Llama-3.2-1B
|
base_model: meta-llama/Llama-3.2-1B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: NousResearch/Llama-3.2-1B
|
base_model: NousResearch/Llama-3.2-1B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
|
base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: casperhansen/llama-3-70b-fp16
|
base_model: casperhansen/llama-3-70b-fp16
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
|
tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: NousResearch/Meta-Llama-3-8B
|
base_model: NousResearch/Meta-Llama-3-8B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
base_model: state-spaces/mamba-2.8b
|
base_model: state-spaces/mamba-2.8b
|
||||||
# optionally might have model_type or tokenizer_type or tokenizer_config
|
|
||||||
model_type: MambaLMHeadModel
|
model_type: MambaLMHeadModel
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
tokenizer_config: EleutherAI/gpt-neox-20b
|
tokenizer_config: EleutherAI/gpt-neox-20b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -4,11 +4,8 @@
|
|||||||
#face problems with the special tokens.
|
#face problems with the special tokens.
|
||||||
|
|
||||||
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistralai/Mixtral-8x7B-v0.1
|
base_model: mistralai/Mixtral-8x7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: mistral-community/Mixtral-8x22B-v0.1
|
base_model: mistral-community/Mixtral-8x22B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: mistralai/Mistral-7B-v0.1
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: MistralForCausalLM
|
model_type: MistralForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,5 @@
|
|||||||
base_model: mosaicml/mpt-7b
|
base_model: mosaicml/mpt-7b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: openlm-research/open_llama_3b_v2
|
base_model: openlm-research/open_llama_3b_v2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: openlm-research/open_llama_3b_v2
|
base_model: openlm-research/open_llama_3b_v2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: openlm-research/open_llama_3b_v2
|
base_model: openlm-research/open_llama_3b_v2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
strict: false
|
strict: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/Phi-3.5-mini-instruct
|
base_model: microsoft/Phi-3.5-mini-instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/phi-1_5
|
base_model: microsoft/phi-1_5
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/phi-1_5
|
base_model: microsoft/phi-1_5
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/phi-2
|
base_model: microsoft/phi-2
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: microsoft/Phi-3-mini-4k-instruct
|
base_model: microsoft/Phi-3-mini-4k-instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,11 +1,7 @@
|
|||||||
base_model: microsoft/Phi-3-mini-4k-instruct
|
base_model: microsoft/Phi-3-mini-4k-instruct
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
chat_template: phi_3
|
chat_template: phi_3
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,11 +1,7 @@
|
|||||||
base_model: EleutherAI/pythia-12b-deduped
|
base_model: EleutherAI/pythia-12b-deduped
|
||||||
base_model_ignore_patterns: pytorch* # prefer safetensors
|
base_model_ignore_patterns: pytorch* # prefer safetensors
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: GPTNeoXForCausalLM
|
model_type: GPTNeoXForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
gptq: false
|
gptq: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: EleutherAI/pythia-1.4b-deduped
|
base_model: EleutherAI/pythia-1.4b-deduped
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
datasets:
|
datasets:
|
||||||
- path: teknium/GPT4-LLM-Cleaned
|
- path: teknium/GPT4-LLM-Cleaned
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: Qwen/Qwen-7B
|
base_model: Qwen/Qwen-7B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: Qwen/Qwen-7B
|
base_model: Qwen/Qwen-7B
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
base_model: Qwen/Qwen1.5-MoE-A2.7B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: Qwen/Qwen2.5-0.5B
|
base_model: Qwen/Qwen2.5-0.5B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
strict: false
|
strict: false
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: Qwen/Qwen2-7B
|
base_model: Qwen/Qwen2-7B
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: GPTNeoXForCausalLM
|
model_type: GPTNeoXForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code:
|
trust_remote_code:
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
base_model: replit/replit-code-v1-3b
|
base_model: replit/replit-code-v1-3b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
datasets:
|
datasets:
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: stabilityai/stablelm-2-1_6b
|
base_model: stabilityai/stablelm-2-1_6b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
base_model: stabilityai/stablelm-2-1_6b
|
base_model: stabilityai/stablelm-2-1_6b
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: AutoModelForCausalLM
|
model_type: AutoModelForCausalLM
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
base_model: bigcode/starcoder2-3b
|
base_model: bigcode/starcoder2-3b
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: false
|
load_in_8bit: false
|
||||||
load_in_4bit: true
|
load_in_4bit: true
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
base_model: TinyLlama/TinyLlama_v1.1
|
base_model: TinyLlama/TinyLlama_v1.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
model_type: LlamaForCausalLM
|
model_type: LlamaForCausalLM
|
||||||
tokenizer_type: LlamaTokenizer
|
tokenizer_type: LlamaTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
base_model: TinyLlama/TinyLlama_v1.1
|
base_model: TinyLlama/TinyLlama_v1.1
|
||||||
# optionally might have model_type or tokenizer_type
|
|
||||||
tokenizer_type: AutoTokenizer
|
tokenizer_type: AutoTokenizer
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_8bit: true
|
load_in_8bit: true
|
||||||
load_in_4bit: false
|
load_in_4bit: false
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user