Update README.md

Quick fix. Local `base_model` paths need to have a trailing `/`.
2024-12-12 15:01:29 -05:00
201 changed files with 2501 additions and 4014 deletions
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -1,7 +1,6 @@
 name: lint
 on:
  # check on PRs, and manual triggers
  merge_group:
  pull_request:
      paths:
       - '**.py'
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -25,6 +25,7 @@ jobs:
            python_version: "3.11"
            pytorch: 2.3.1
            axolotl_extras: mamba-ssm
            is_latest: true
          - cuda: 124
            cuda_version: 12.4.1
            python_version: "3.11"
@@ -35,7 +36,6 @@ jobs:
            python_version: "3.11"
            pytorch: 2.5.1
            axolotl_extras:
            is_latest: true
    runs-on: axolotl-gpu-runner
    steps:
      - name: Checkout
@@ -92,6 +92,7 @@ jobs:
            python_version: "3.11"
            pytorch: 2.3.1
            axolotl_extras:
            is_latest: true
          - cuda: 124
            cuda_version: 12.4.1
            python_version: "3.11"
@@ -102,7 +103,6 @@ jobs:
            python_version: "3.11"
            pytorch: 2.5.1
            axolotl_extras:
            is_latest: true
    runs-on: axolotl-gpu-runner
    steps:
      - name: Checkout
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -52,7 +52,7 @@ jobs:
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
-          pip install modal==0.71.8 jinja2
+          pip install modal==0.63.64 jinja2
      - name: Update env vars
        run: |
          echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -129,7 +129,7 @@ jobs:
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
-          pip install modal==0.71.8 jinja2
+          pip install modal==0.63.64 jinja2
      - name: Update env vars
        run: |
          echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,7 +1,6 @@
 name: Tests
 on:
  # check on push/merge to main, PRs, and manual triggers
  merge_group:
  push:
    branches:
      - "main"
@@ -61,15 +60,6 @@ jobs:
      - name: Check out repository code
        uses: actions/checkout@v4
      - name: Restore HF cache
        id: hf-cache-restore
        uses: actions/cache/restore@v4
        with:
          path: |
            /home/runner/.cache/huggingface/hub/datasets--*
            /home/runner/.cache/huggingface/hub/models--*
          key: ${{ runner.os }}-hf-hub-cache-${{ hashFiles('**/conftest.py') }}
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
@@ -110,15 +100,6 @@ jobs:
        run: |
          find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
      - name: Save HF cache
        id: hf-cache
        uses: actions/cache/save@v4
        with:
          path: |
            /home/runner/.cache/huggingface/hub/datasets--*
            /home/runner/.cache/huggingface/hub/models--*
          key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
  pytest-sdist:
    name: PyTest from Source Dist
    runs-on: ubuntu-latest
@@ -134,15 +115,6 @@ jobs:
      - name: Check out repository code
        uses: actions/checkout@v4
      - name: Restore HF cache
        id: hf-cache-restore
        uses: actions/cache/restore@v4
        with:
          path: |
            /home/runner/.cache/huggingface/hub/datasets--*
            /home/runner/.cache/huggingface/hub/models--*
          key: ${{ runner.os }}-hf-hub-cache-${{ hashFiles('**/conftest.py') }}
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
@@ -184,15 +156,6 @@ jobs:
        run: |
          find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
      - name: Save HF cache
        id: hf-cache
        uses: actions/cache/save@v4
        with:
          path: |
            /home/runner/.cache/huggingface/hub/datasets--*
            /home/runner/.cache/huggingface/hub/models--*
          key: ${{ steps.hf-cache-restore.outputs.cache-primary-key }}
  docker-e2e-tests-1st:
    if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }}
    # this job needs to be run on self-hosted GPU runners...
@@ -220,7 +183,7 @@ jobs:
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
-          pip install modal==0.71.8 jinja2
+          pip install modal==0.63.64 jinja2
      - name: Update env vars
        run: |
          echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
@@ -266,7 +229,7 @@ jobs:
      - name: Install Modal
        run: |
          python -m pip install --upgrade pip
-          pip install modal==0.71.8 jinja2
+          pip install modal==0.63.64 jinja2
      - name: Update env vars
        run: |
          echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 **/axolotl.egg-info
 configs
 last_run_prepared/
 outputs
 .vscode
 _site/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ repos:
    hooks:
    - id: flake8
 -   repo: https://github.com/PyCQA/pylint
-    rev: v3.3.0
+    rev: v2.17.4
    hooks:
    - id: pylint
 -   repo: https://github.com/pre-commit/mirrors-mypy
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,5 +1,5 @@
 [MASTER]
-init-hook="from pylint.config import find_default_config_files; import sys; sys.path.append(next(find_default_config_files()).parent.as_posix())"
+init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc()))"
 [TYPECHECK]
@@ -12,4 +12,3 @@ generated-members=numpy.*, torch.*
 disable=missing-function-docstring, line-too-long, import-error,
    too-many-arguments, too-many-locals, too-many-statements, too-many-branches, too-few-public-methods,
    too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation,
    too-many-positional-arguments, possibly-used-before-assignment
--- a/README.md
+++ b/README.md
@@ -478,7 +478,7 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
 - model
  ```yaml
-  base_model: ./llama-7b-hf # local or huggingface repo
+  base_model: ./llama-7b-hf/ # local or huggingface repo
  ```
  Note: The code will load the right architecture.
--- a/cicd/Dockerfile.jinja
+++ b/cicd/Dockerfile.jinja
@@ -8,7 +8,6 @@ ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}"
 ENV GITHUB_REF="{{ GITHUB_REF }}"
 ENV GITHUB_SHA="{{ GITHUB_SHA }}"
 ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}"
 ENV HF_HOME="{{ HF_HOME }}"
 RUN apt-get update && \
    apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev
--- a/cicd/cicd.sh
+++ b/cicd/cicd.sh
@@ -5,6 +5,6 @@ python -c "import torch; assert '$PYTORCH_VERSION' in torch.__version__"
 pytest -v --durations=10 -n8 --ignore=tests/e2e/ --ignore=tests/patched/ /workspace/axolotl/tests/
 # pytest -v --durations=10 -n8 --dist loadfile /workspace/axolotl/tests/patched/
-pytest -v --durations=10 /workspace/axolotl/tests/e2e/patched/
+pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/patched/
-pytest -v --durations=10 /workspace/axolotl/tests/e2e/integrations/
+pytest -v --durations=10 -n1 --dist loadfile /workspace/axolotl/tests/e2e/integrations/
 pytest -v --durations=10 --ignore=tests/e2e/patched/ --ignore=tests/e2e/multigpu/ --ignore=tests/e2e/integrations/ /workspace/axolotl/tests/e2e/
--- a/cicd/multigpu.py
+++ b/cicd/multigpu.py
@@ -28,7 +28,6 @@ df_args = {
    "CUDA": os.environ.get("CUDA", "121"),
    "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
    "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
    "HF_HOME": "/workspace/data/huggingface-cache/hub",
 }
 dockerfile_contents = df_template.render(**df_args)
@@ -49,12 +48,6 @@ cicd_image = (
 app = App("Axolotl CI/CD", secrets=[])
 hf_cache_volume = modal.Volume.from_name(
    "axolotl-ci-hf-hub-cache", create_if_missing=True
 )
 VOLUME_CONFIG = {
    "/workspace/data/huggingface-cache/hub": hf_cache_volume,
 }
 N_GPUS = int(os.environ.get("N_GPUS", 2))
 GPU_CONFIG = modal.gpu.H100(count=N_GPUS)
@@ -74,7 +67,6 @@ def run_cmd(cmd: str, run_folder: str):
    timeout=60 * 60,
    cpu=8.0,
    memory=131072 * N_GPUS,
    volumes=VOLUME_CONFIG,
 )
 def cicd_pytest():
    run_cmd("./cicd/multigpu.sh", "/workspace/axolotl")
--- a/cicd/tests.py
+++ b/cicd/tests.py
@@ -29,7 +29,6 @@ df_args = {
    "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
    "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
    "NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
    "HF_HOME": "/workspace/data/huggingface-cache/hub",
 }
 dockerfile_contents = df_template.render(**df_args)
@@ -51,12 +50,6 @@ cicd_image = (
 app = App("Axolotl CI/CD", secrets=[])
 hf_cache_volume = modal.Volume.from_name(
    "axolotl-ci-hf-hub-cache", create_if_missing=True
 )
 VOLUME_CONFIG = {
    "/workspace/data/huggingface-cache/hub": hf_cache_volume,
 }
 N_GPUS = int(os.environ.get("N_GPUS", 1))
 GPU_CONFIG = modal.gpu.A10G(count=N_GPUS)
@@ -76,7 +69,6 @@ def run_cmd(cmd: str, run_folder: str):
    timeout=60 * 60,
    cpu=8.0,
    memory=131072,
    volumes=VOLUME_CONFIG,
 )
 def cicd_pytest():
    run_cmd("./cicd/cicd.sh", "/workspace/axolotl")
--- a/deepspeed_configs/zero1_torch_compile.json
+++ b/deepspeed_configs/zero1_torch_compile.json
@@ -1,27 +0,0 @@
 {
  "zero_optimization": {
    "stage": 1,
    "overlap_comm": true
  },
  "bf16": {
    "enabled": "auto"
  },
  "fp16": {
    "enabled": "auto",
    "auto_cast": false,
    "loss_scale": 0,
    "initial_scale_power": 32,
    "loss_scale_window": 1000,
    "hysteresis": 2,
    "min_loss_scale": 1
  },
  "compile": {
    "disable": false,
    "backend": "inductor"
  },
  "gradient_accumulation_steps": "auto",
  "gradient_clipping": "auto",
  "train_batch_size": "auto",
  "train_micro_batch_size_per_gpu": "auto",
  "wall_clock_breakdown": false
 }
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -127,40 +127,34 @@ datasets:
    # - tokenizer_default_fallback_*: where * is the name of the chat template to fallback to if the tokenizer does not have a chat template else default to tokenizer. E.g. tokenizer_default_fallback_chatml.
    # - jinja: Uses a custom jinja template for the chat template. The custom jinja template should be provided in the chat_template_jinja field.
    chat_template: tokenizer_default
-
+    # Custom jinja template for chat template. This will be only used if `chat_template` is set to `jinja` or empty (in which case chat_template is automatically set to `jinja`).
    # Custom jinja chat template. Used only if `chat_template: jinja` or empty.
    chat_template_jinja:
-
+    # The key in the data example that contains the messages. Default is "messages".
    # Key containing the messages (default: "messages")
    field_messages: messages
-    # Key for role in each message (default: "role")
+    # The key in the message turn that contains the role. Default is "role".
    message_field_role: role
-    # Key for content in each message (default:  "content")
+    # The key in the message turn that contains the content. Default is "content".
    message_field_content: content
-
+    # Optional[Dict[str, List]]. Roles mapping for the messages.
    # Optional[Dict[str, List]]. Roles mapping in the messages. The default is:
    roles:
      user: ["human", "user"]
-      assistant: ["gpt", "assistant"]
+      assistant: ["gpt", "assistant", "ai"]
      system: ["system"]
      tool: ["tool"]
-    # IMPORTANT: The following fields determine which parts of the conversation to train on.
+    ## NOTE: Leaving the below empty will default to using the simple legacy tokenization strategy where only last message is trained on.
    # Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
    # See examples at `docs/dataset-formats/conversation.qmd`
    # Note: If the below 4 fields are empty, defaults to training only on the last message.
    # Optional[List[str]]. Roles to train on. The tokens from these roles will be considered for the loss.
-    roles_to_train: ["assistant"]  # default
+    roles_to_train: ["gpt", "assistant"]
    # Optional[str]. Which EOS tokens to train on in the conversation. Possible values are:
    # - all: train on all EOS tokens
-    # - turn (default): train on the EOS token at the end of each trainable turn
+    # - turn: train on the EOS token at the end of each trainable turn
    # - last: train on the last EOS token in the conversation
    train_on_eos: last
    # The key in the message turn that indicates via boolean whether tokens of a turn should be considered for training. Useful to selectively train on certain turns besides the `roles_to_train`.
    message_field_training: training
    # The key in the message turn that contains the training details. Useful to selectively train on certain tokens in a turn.
    # The value of the key is a List[Dict] containing `begin_offset` (start character index in content), `end_offset` (end character index in content), and `train` (boolean whether to train).
    # See example at `docs/dataset-formats/conversation.qmd`
    message_field_training_detail: train_detail
@@ -245,9 +239,6 @@ sample_packing_group_size: 100000
 # The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
 sample_packing_bin_size: 200
 # Use batch flattening for speedups when not using sample_packing
 batch_flattening:
 # Passed through to transformers when loading the model when launched without accelerate
 # Use `sequential` when training w/ model parallelism to limit memory
 device_map:
@@ -340,8 +331,7 @@ comet_experiment_config: # Dictionary for additional configuration settings, see
 output_dir: ./completed-model
 # Whether to use torch.compile and which backend to use
-# setting to `auto` will enable torch compile when torch>=2.5.1
+torch_compile:  # bool
 torch_compile:  # Optional[Union[Literal["auto"], bool]]
 torch_compile_backend:  # Optional[str]
 # Training hyperparameters
@@ -373,10 +363,6 @@ eval_table_size: # Approximate number of predictions sent to wandb depending on
 eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
 eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", "chrf", "perplexity"]
 profiler_steps: # enable the pytorch profiler to capture the first N steps of training to the output_dir.
                # see https://pytorch.org/blog/understanding-gpu-memory-1/ for more information
                # snapshots can be visualized @ https://pytorch.org/memory_viz
 loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
 loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)
--- a/docs/dataset-formats/conversation.qmd
+++ b/docs/dataset-formats/conversation.qmd
@@ -68,8 +68,6 @@ We recommend checking the below examples for other usecases.
 datasets:
  - path: ...
    type: chat_template
    roles_to_train:
    train_on_eos:
 ```
 2. Using the `gemma` chat template to override the tokenizer_config.json's chat template on OpenAI messages format, training on all assistant messages.
@@ -79,7 +77,7 @@ chat_template: gemma # this overwrites the tokenizer's chat_template
 datasets:
  - path: ...
    type: chat_template
-    roles_to_train: ["assistant"]  # default value
+    roles_to_train: ["assistant"]
 ```
 3. Using the tokenizer_config.json's chat template or `chatml` as fallback if the former's chat template does not exist, on OpenAI messages format, training on all assistant messages.
@@ -89,6 +87,7 @@ chat_template: tokenizer_default_fallback_chatml # this overwrites the tokenizer
 datasets:
  - path: ...
    type: chat_template
    roles_to_train: ["assistant"]
 ```
 4. Using a custom jinja template on OpenAI messages format, training on all assistant messages.
@@ -100,6 +99,7 @@ chat_template_jinja: "{{ bos_token }}{% for message in messages %}{% if (message
 datasets:
  - path: ...
    type: chat_template
    roles_to_train: ["assistant"]
 ```
 5. (Advanced) Using fine-grained control over tokens and turns to train in a conversation
--- a/docs/dataset-formats/pretraining.qmd
+++ b/docs/dataset-formats/pretraining.qmd
@@ -19,14 +19,7 @@ For pretraining, there is no prompt template or roles.  The only required field
 Axolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming:
 ```{.yaml filename="config.yaml"}
-pretraining_dataset:
+pretraining_dataset: # hf path only
  - name:
    path:
    split:
    text_column: # column in dataset with the data, usually `text`
    type: pretrain
    trust_remote_code:
    skip: # number of rows of data to skip over from the beginning
 ...
 ```
--- a/examples/cerebras/btlm-ft.yml
+++ b/examples/cerebras/btlm-ft.yml
@@ -1,10 +1,6 @@
 base_model: cerebras/btlm-3b-8k-base
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: GPT2Tokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 tokenizer_use_fast: true
 tokenizer_legacy: true
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -1,7 +1,4 @@
 base_model: cerebras/Cerebras-GPT-1.3B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
 strict: false
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-13b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-13b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-34b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-34b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-7b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -1,9 +1,6 @@
 base_model: codellama/CodeLlama-7b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/dbrx/16bit-lora.yaml
+++ b/examples/dbrx/16bit-lora.yaml
@@ -1,7 +1,4 @@
 base_model: LnL-AI/dbrx-base-converted-v2
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/dbrx/8bit-lora.yaml
+++ b/examples/dbrx/8bit-lora.yaml
@@ -1,7 +1,4 @@
 base_model: LnL-AI/dbrx-base-converted-v2
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: true
--- a/examples/dbrx/fft-ds-zero3.yaml
+++ b/examples/dbrx/fft-ds-zero3.yaml
@@ -1,7 +1,4 @@
 base_model: LnL-AI/dbrx-base-converted-v2
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/deepseek-v2/fft-fsdp-16b.yaml
+++ b/examples/deepseek-v2/fft-fsdp-16b.yaml
@@ -1,6 +1,4 @@
 base_model: deepseek-ai/DeepSeek-V2-Lite
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/deepseek-v2/qlora-fsdp-2_5.yaml
+++ b/examples/deepseek-v2/qlora-fsdp-2_5.yaml
@@ -1,7 +1,4 @@
 base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -1,12 +1,7 @@
 base_model: tiiuae/falcon-7b
-# optionally might have model_type or tokenizer_type
+trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
 trust_remote_code: true
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -1,15 +1,10 @@
 # 1b: tiiuae/falcon-rw-1b
 # 40b: tiiuae/falcon-40b
 base_model: tiiuae/falcon-7b
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
 trust_remote_code: true
-
+model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 load_in_8bit: false
 # enable 4bit for QLoRA
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -1,12 +1,7 @@
 base_model: tiiuae/falcon-7b
-# optionally might have model_type or tokenizer_type
+trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
 trust_remote_code: true
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/gemma/qlora.yml
+++ b/examples/gemma/qlora.yml
@@ -1,10 +1,7 @@
 # use google/gemma-7b if you have access
 base_model: mhenrichsen/gemma-7b
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/gemma2/qlora.yml
+++ b/examples/gemma2/qlora.yml
@@ -1,9 +1,6 @@
 base_model: google/gemma-2-9b
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/gemma2/reward-model.yaml
+++ b/examples/gemma2/reward-model.yaml
@@ -1,9 +1,6 @@
 base_model: google/gemma-2-2b
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForSequenceClassification
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -1,7 +1,4 @@
 base_model: EleutherAI/gpt-j-6b
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
 strict: false
--- a/examples/jamba/qlora.yaml
+++ b/examples/jamba/qlora.yaml
@@ -1,7 +1,4 @@
 base_model: ai21labs/Jamba-v0.1
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/jamba/qlora_deepspeed.yaml
+++ b/examples/jamba/qlora_deepspeed.yaml
@@ -1,6 +1,4 @@
 base_model: ai21labs/Jamba-v0.1
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/jamba/qlora_fsdp_large.yaml
+++ b/examples/jamba/qlora_fsdp_large.yaml
@@ -1,8 +1,5 @@
 base_model: ai21labs/AI21-Jamba-1.5-Large
 # optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_4bit: true
 strict: false
--- a/examples/jeopardy-bot/config.yml
+++ b/examples/jeopardy-bot/config.yml
@@ -1,10 +1,6 @@
 base_model: huggyllama/llama-7b
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 datasets:
  - path: openaccess-ai-collective/jeopardy
--- a/examples/llama-2/fft_optimized.yml
+++ b/examples/llama-2/fft_optimized.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-2/gptq-lora.yml
+++ b/examples/llama-2/gptq-lora.yml
@@ -1,13 +1,8 @@
 base_model: TheBloke/Llama-2-7B-GPTQ
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 gptq: true
 gptq_disable_exllama: true
-
+model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
 tokenizer_use_fast: true
 tokenizer_legacy: true
 load_in_8bit: false
--- a/examples/llama-2/lisa.yml
+++ b/examples/llama-2/lisa.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-2/loftq.yml
+++ b/examples/llama-2/loftq.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-2/qlora-fsdp.yml
+++ b/examples/llama-2/qlora-fsdp.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Llama-2-7b-hf
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-3-vision/lora-11b.yaml
+++ b/examples/llama-3-vision/lora-11b.yaml
@@ -1,9 +1,5 @@
 base_model: alpindale/Llama-3.2-11B-Vision-Instruct
 # optionally might have model_type or tokenizer_type or processor_type
 processor_type: AutoProcessor
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 strict: false
 # these 3 lines are needed for now to handle vision chat templates w images
--- a/examples/llama-3/fft-8b-liger-fsdp.yaml
+++ b/examples/llama-3/fft-8b-liger-fsdp.yaml
@@ -1,6 +1,4 @@
 base_model: NousResearch/Meta-Llama-3.1-8B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 plugins:
  - axolotl.integrations.liger.LigerPlugin
--- a/examples/llama-3/fft-8b.yaml
+++ b/examples/llama-3/fft-8b.yaml
@@ -1,6 +1,4 @@
 base_model: NousResearch/Meta-Llama-3.1-8B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-3/instruct-dpo-lora-8b.yml
+++ b/examples/llama-3/instruct-dpo-lora-8b.yml
@@ -1,9 +1,6 @@
 base_model: meta-llama/Meta-Llama-3-8B-Instruct
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/instruct-lora-8b.yml
+++ b/examples/llama-3/instruct-lora-8b.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Meta-Llama-3-8B-Instruct
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/lora-1b-deduplicate-dpo.yml
+++ b/examples/llama-3/lora-1b-deduplicate-dpo.yml
@@ -1,9 +1,6 @@
 base_model: meta-llama/Llama-3.2-1B
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/lora-1b-deduplicate-sft.yml
+++ b/examples/llama-3/lora-1b-deduplicate-sft.yml
@@ -1,9 +1,6 @@
 base_model: meta-llama/Llama-3.2-1B
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/lora-1b.yml
+++ b/examples/llama-3/lora-1b.yml
@@ -1,6 +1,4 @@
 base_model: NousResearch/Llama-3.2-1B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/llama-3/lora-8b.yml
+++ b/examples/llama-3/lora-8b.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Meta-Llama-3-8B
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/llama-3/qlora-1b-kto.yaml
+++ b/examples/llama-3/qlora-1b-kto.yaml
@@ -1,6 +1,4 @@
 base_model: meta-llama/Llama-3.2-1B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-3/qlora-1b.yml
+++ b/examples/llama-3/qlora-1b.yml
@@ -1,6 +1,4 @@
 base_model: NousResearch/Llama-3.2-1B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-3/qlora-fsdp-405b.yaml
+++ b/examples/llama-3/qlora-fsdp-405b.yaml
@@ -1,8 +1,5 @@
 base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
 # optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_4bit: true
 strict: false
--- a/examples/llama-3/qlora-fsdp-70b.yaml
+++ b/examples/llama-3/qlora-fsdp-70b.yaml
@@ -1,9 +1,6 @@
 base_model: casperhansen/llama-3-70b-fp16
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer  # PreTrainedTokenizerFast
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/llama-3/qlora.yml
+++ b/examples/llama-3/qlora.yml
@@ -1,9 +1,6 @@
 base_model: NousResearch/Meta-Llama-3-8B
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mamba/config.yml
+++ b/examples/mamba/config.yml
@@ -1,10 +1,7 @@
 base_model: state-spaces/mamba-2.8b
 # optionally might have model_type or tokenizer_type or tokenizer_config
 model_type: MambaLMHeadModel
 tokenizer_type: AutoTokenizer
 tokenizer_config: EleutherAI/gpt-neox-20b
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/mistral/bigstral-ds-zero3.yaml
+++ b/examples/mistral/bigstral-ds-zero3.yaml
@@ -1,10 +1,6 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/mistral/lora-mps.yml
+++ b/examples/mistral/lora-mps.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/mistral/lora.yml
+++ b/examples/mistral/lora.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/mistral/mistral-dpo-qlora.yml
+++ b/examples/mistral/mistral-dpo-qlora.yml
@@ -4,11 +4,8 @@
 #face problems with the special tokens.
 base_model: mistralai/Mistral-7B-Instruct-v0.2
 # optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mistral/mistral-qlora-fsdp.yml
+++ b/examples/mistral/mistral-qlora-fsdp.yml
@@ -1,10 +1,6 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/mistral/mistral-qlora-orpo.yml
+++ b/examples/mistral/mistral-qlora-orpo.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
@@ -1,9 +1,6 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mistral/mixtral-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-qlora-fsdp.yml
@@ -1,10 +1,6 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/mistral/mixtral.yml
+++ b/examples/mistral/mixtral.yml
@@ -1,10 +1,6 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/mistral/mixtral_22.yml
+++ b/examples/mistral/mixtral_22.yml
@@ -1,10 +1,6 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -1,9 +1,6 @@
 base_model: mistralai/Mistral-7B-v0.1
 # optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -1,9 +1,5 @@
 base_model: mosaicml/mpt-7b
 # optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true  # required for mpt as their model class is not merged into transformers yet
 load_in_8bit: false
 datasets:
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -1,10 +1,6 @@
 base_model: openlm-research/open_llama_3b_v2
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
 strict: false
--- a/examples/openllama-3b/lora.yml
+++ b/examples/openllama-3b/lora.yml
@@ -1,10 +1,6 @@
 base_model: openlm-research/open_llama_3b_v2
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
 strict: false
--- a/examples/openllama-3b/qlora.yml
+++ b/examples/openllama-3b/qlora.yml
@@ -1,10 +1,6 @@
 base_model: openlm-research/open_llama_3b_v2
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
 strict: false
--- a/examples/phi/lora-3.5.yaml
+++ b/examples/phi/lora-3.5.yaml
@@ -1,9 +1,6 @@
 base_model: microsoft/Phi-3.5-mini-instruct
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/phi-1_5
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/phi-1_5
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/phi/phi2-ft.yml
+++ b/examples/phi/phi2-ft.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/phi-2
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/phi/phi3-ft-fsdp.yml
+++ b/examples/phi/phi3-ft-fsdp.yml
@@ -1,9 +1,6 @@
 base_model: microsoft/Phi-3-mini-4k-instruct
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
--- a/examples/phi/phi3-ft.yml
+++ b/examples/phi/phi3-ft.yml
@@ -1,11 +1,7 @@
 base_model: microsoft/Phi-3-mini-4k-instruct
 # optionally might have model_type or tokenizer_type
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 chat_template: phi_3
 load_in_8bit: false
--- a/examples/pythia-12b/config.yml
+++ b/examples/pythia-12b/config.yml
@@ -1,11 +1,7 @@
 base_model: EleutherAI/pythia-12b-deduped
 base_model_ignore_patterns: pytorch*  # prefer safetensors
 # optionally might have model_type or tokenizer_type
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: false
 gptq: false
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -1,7 +1,4 @@
 base_model: EleutherAI/pythia-1.4b-deduped
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 datasets:
  - path: teknium/GPT4-LLM-Cleaned
--- a/examples/qwen/lora.yml
+++ b/examples/qwen/lora.yml
@@ -1,9 +1,6 @@
 base_model: Qwen/Qwen-7B
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
--- a/examples/qwen/qlora.yml
+++ b/examples/qwen/qlora.yml
@@ -1,9 +1,6 @@
 base_model: Qwen/Qwen-7B
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
--- a/examples/qwen/qwen2-moe-lora.yaml
+++ b/examples/qwen/qwen2-moe-lora.yaml
@@ -1,7 +1,4 @@
 base_model: Qwen/Qwen1.5-MoE-A2.7B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/qwen/qwen2-moe-qlora.yaml
+++ b/examples/qwen/qwen2-moe-qlora.yaml
@@ -1,7 +1,4 @@
 base_model: Qwen/Qwen1.5-MoE-A2.7B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/qwen2/dpo.yaml
+++ b/examples/qwen2/dpo.yaml
@@ -1,6 +1,4 @@
 base_model: Qwen/Qwen2.5-0.5B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 strict: false
--- a/examples/qwen2/qlora-fsdp.yaml
+++ b/examples/qwen2/qlora-fsdp.yaml
@@ -1,7 +1,4 @@
 base_model: Qwen/Qwen2-7B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/redpajama/config-3b.yml
+++ b/examples/redpajama/config-3b.yml
@@ -1,10 +1,6 @@
 base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
 # optionally might have model_type or tokenizer_type
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code:
 load_in_8bit: false
 datasets:
--- a/examples/replit-3b/config-lora.yml
+++ b/examples/replit-3b/config-lora.yml
@@ -1,7 +1,4 @@
 base_model: replit/replit-code-v1-3b
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
 datasets:
--- a/examples/stablelm-2/1.6b/fft.yml
+++ b/examples/stablelm-2/1.6b/fft.yml
@@ -1,10 +1,6 @@
 base_model: stabilityai/stablelm-2-1_6b
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: false
--- a/examples/stablelm-2/1.6b/lora.yml
+++ b/examples/stablelm-2/1.6b/lora.yml
@@ -1,10 +1,6 @@
 base_model: stabilityai/stablelm-2-1_6b
 # optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
 load_in_8bit: true
--- a/examples/starcoder2/qlora.yml
+++ b/examples/starcoder2/qlora.yml
@@ -1,6 +1,4 @@
 base_model: bigcode/starcoder2-3b
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: false
 load_in_4bit: true
--- a/examples/tiny-llama/lora-mps.yml
+++ b/examples/tiny-llama/lora-mps.yml
@@ -1,9 +1,6 @@
 base_model: TinyLlama/TinyLlama_v1.1
 # optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/examples/tiny-llama/lora.yml
+++ b/examples/tiny-llama/lora.yml
@@ -1,8 +1,5 @@
 base_model: TinyLlama/TinyLlama_v1.1
 # optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 load_in_8bit: true
 load_in_4bit: false
--- a/Show More
+++ b/Show More