Compare commits
4 Commits
llmcompres
...
smaller-ra
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a0670abc94 | ||
|
|
08f287b57f | ||
|
|
b4c7d9c29d | ||
|
|
d2637fb01d |
12
.github/workflows/base.yml
vendored
12
.github/workflows/base.yml
vendored
@@ -46,18 +46,6 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
- cuda: "126"
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
cudnn_version: ""
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.0
|
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
|
||||||
- cuda: "128"
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
cudnn_version: ""
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.0
|
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
|
||||||
- cuda: "128"
|
- cuda: "128"
|
||||||
cuda_version: 12.8.1
|
cuda_version: 12.8.1
|
||||||
cudnn_version: ""
|
cudnn_version: ""
|
||||||
|
|||||||
12
.github/workflows/main.yml
vendored
12
.github/workflows/main.yml
vendored
@@ -31,11 +31,6 @@ jobs:
|
|||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
axolotl_extras: vllm
|
axolotl_extras: vllm
|
||||||
is_latest: true
|
is_latest: true
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.0
|
|
||||||
axolotl_extras: vllm
|
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -98,11 +93,6 @@ jobs:
|
|||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
is_latest: true
|
is_latest: true
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.0
|
|
||||||
axolotl_extras:
|
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -148,7 +138,7 @@ jobs:
|
|||||||
- cuda: 124
|
- cuda: 124
|
||||||
cuda_version: 12.4.1
|
cuda_version: 12.4.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.4.1
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
8
.github/workflows/multi-gpu-e2e.yml
vendored
8
.github/workflows/multi-gpu-e2e.yml
vendored
@@ -45,13 +45,6 @@ jobs:
|
|||||||
axolotl_extras: vllm
|
axolotl_extras: vllm
|
||||||
num_gpus: 2
|
num_gpus: 2
|
||||||
nightly_build: "true"
|
nightly_build: "true"
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.0
|
|
||||||
axolotl_extras:
|
|
||||||
num_gpus: 2
|
|
||||||
nightly_build: "true"
|
|
||||||
runs-on: [self-hosted, modal]
|
runs-on: [self-hosted, modal]
|
||||||
timeout-minutes: 120
|
timeout-minutes: 120
|
||||||
steps:
|
steps:
|
||||||
@@ -74,7 +67,6 @@ jobs:
|
|||||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||||
echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
|
echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
|
||||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.multigpu
|
modal run cicd.multigpu
|
||||||
|
|||||||
1
.github/workflows/tests-nightly.yml
vendored
1
.github/workflows/tests-nightly.yml
vendored
@@ -147,7 +147,6 @@ jobs:
|
|||||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||||
echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
|
echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
|
||||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.e2e_tests
|
modal run cicd.e2e_tests
|
||||||
|
|||||||
11
.github/workflows/tests.yml
vendored
11
.github/workflows/tests.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
python_version: ["3.11"]
|
python_version: ["3.11"]
|
||||||
pytorch_version: ["2.4.1", "2.5.1", "2.6.0", "2.7.0"]
|
pytorch_version: ["2.4.1", "2.5.1", "2.6.0"]
|
||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -109,7 +109,6 @@ jobs:
|
|||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v5
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.CODECOV_TOKEN }}
|
|
||||||
files: ./coverage.xml
|
files: ./coverage.xml
|
||||||
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
||||||
fail_ci_if_error: false
|
fail_ci_if_error: false
|
||||||
@@ -242,7 +241,6 @@ jobs:
|
|||||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||||
echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV
|
echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV
|
||||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.e2e_tests
|
modal run cicd.e2e_tests
|
||||||
@@ -270,12 +268,6 @@ jobs:
|
|||||||
pytorch: 2.5.1
|
pytorch: 2.5.1
|
||||||
num_gpus: 1
|
num_gpus: 1
|
||||||
axolotl_extras: vllm
|
axolotl_extras: vllm
|
||||||
- cuda: 126
|
|
||||||
cuda_version: 12.6.3
|
|
||||||
python_version: "3.11"
|
|
||||||
pytorch: 2.7.0
|
|
||||||
num_gpus: 1
|
|
||||||
axolotl_extras:
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -296,7 +288,6 @@ jobs:
|
|||||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||||
echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV
|
echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV
|
||||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||||
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.e2e_tests
|
modal run cicd.e2e_tests
|
||||||
|
|||||||
12
cicd/cicd.sh
12
cicd/cicd.sh
@@ -9,7 +9,8 @@ pytest -v --durations=10 -n8 \
|
|||||||
--ignore=tests/patched/ \
|
--ignore=tests/patched/ \
|
||||||
--ignore=tests/cli \
|
--ignore=tests/cli \
|
||||||
/workspace/axolotl/tests/ \
|
/workspace/axolotl/tests/ \
|
||||||
--cov=axolotl
|
--cov=axolotl \
|
||||||
|
--cov-report=xml:coverage.xml
|
||||||
|
|
||||||
# Run lora kernels tests with coverage append
|
# Run lora kernels tests with coverage append
|
||||||
pytest -v --durations=10 \
|
pytest -v --durations=10 \
|
||||||
@@ -50,6 +51,11 @@ pytest -v --durations=10 \
|
|||||||
/workspace/axolotl/tests/e2e/ \
|
/workspace/axolotl/tests/e2e/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append \
|
--cov-append \
|
||||||
--cov-report=xml:e2e-coverage.xml
|
--cov-report=xml:coverage.xml
|
||||||
|
|
||||||
codecov upload-process -t $CODECOV_TOKEN -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION}
|
# Upload coverage to Codecov
|
||||||
|
if [ -f e2e-coverage.xml ]; then
|
||||||
|
codecov -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION}
|
||||||
|
else
|
||||||
|
echo "Coverage file not found. Coverage report may have failed."
|
||||||
|
fi
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ df_args = {
|
|||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
"NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
|
"NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
|
||||||
"CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""),
|
|
||||||
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,6 @@ df_args = {
|
|||||||
"CUDA": os.environ.get("CUDA", "121"),
|
"CUDA": os.environ.get("CUDA", "121"),
|
||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
"CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""),
|
|
||||||
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,23 +1,25 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# only run one test at a time so as not to OOM the GPU
|
||||||
|
pytest -v --durations=10 -n2 /workspace/axolotl/tests/e2e/multigpu/ --ignore=/workspace/axolotl/tests/e2e/multigpu/solo/
|
||||||
|
pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/multigpu/solo/
|
||||||
|
|
||||||
# Only run two tests at a time to avoid OOM on GPU (with coverage collection)
|
# Only run two tests at a time to avoid OOM on GPU (with coverage collection)
|
||||||
pytest -v -n2 \
|
pytest -v -n2 \
|
||||||
--ignore=/workspace/axolotl/tests/e2e/multigpu/solo/ \
|
--ignore=/workspace/axolotl/tests/e2e/multigpu/solo/
|
||||||
--ignore=/workspace/axolotl/tests/e2e/multigpu/patched/ \
|
|
||||||
/workspace/axolotl/tests/e2e/multigpu/ \
|
/workspace/axolotl/tests/e2e/multigpu/ \
|
||||||
--cov=axolotl
|
|
||||||
|
|
||||||
# Run solo tests with coverage append
|
|
||||||
pytest -v --durations=10 -n1 \
|
|
||||||
/workspace/axolotl/tests/e2e/multigpu/solo/ \
|
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append
|
--cov-report=xml:multigpu-coverage.xml
|
||||||
|
|
||||||
pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/multigpu/patched/ \
|
pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/multigpu/solo/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append \
|
--cov-append \
|
||||||
--cov-report=xml:multigpu-coverage.xml
|
--cov-report=xml:multigpu-coverage.xml
|
||||||
|
|
||||||
# Upload coverage to Codecov
|
# Upload coverage to Codecov
|
||||||
codecov upload-process -t $CODECOV_TOKEN -f multigpu-coverage.xml -F multigpu,docker-tests,pytorch-${PYTORCH_VERSION}
|
if [ -f multigpu-coverage.xml ]; then
|
||||||
|
codecov -f multigpu-coverage.xml -F multigpu,docker-tests,pytorch-${PYTORCH_VERSION}
|
||||||
|
else
|
||||||
|
echo "Coverage file not found. Coverage report may have failed."
|
||||||
|
fi
|
||||||
|
|||||||
@@ -49,6 +49,3 @@ comment:
|
|||||||
require_changes: no
|
require_changes: no
|
||||||
require_base: no
|
require_base: no
|
||||||
require_head: yes
|
require_head: yes
|
||||||
|
|
||||||
github_checks:
|
|
||||||
annotations: false
|
|
||||||
|
|||||||
@@ -37,7 +37,3 @@ RUN git lfs install --skip-repo && \
|
|||||||
pip3 install awscli && \
|
pip3 install awscli && \
|
||||||
# The base image ships with `pydantic==1.8.2` which is not working
|
# The base image ships with `pydantic==1.8.2` which is not working
|
||||||
pip3 install -U --no-cache-dir pydantic==1.10.10
|
pip3 install -U --no-cache-dir pydantic==1.10.10
|
||||||
|
|
||||||
RUN if [ "$PYTORCH_VERSION" = "2.7.0" ] ; then \
|
|
||||||
pip3 install flash-attn==2.7.4.post1; \
|
|
||||||
fi
|
|
||||||
|
|||||||
11
docs/cli.qmd
11
docs/cli.qmd
@@ -199,17 +199,6 @@ output_dir: # Directory to save evaluation results
|
|||||||
|
|
||||||
See [LM Eval Harness](https://github.com/EleutherAI/lm-evaluation-harness) for more details.
|
See [LM Eval Harness](https://github.com/EleutherAI/lm-evaluation-harness) for more details.
|
||||||
|
|
||||||
### delinearize-llama4
|
|
||||||
|
|
||||||
Delinearizes a Llama 4 linearized model into a regular HuggingFace Llama 4 model. This only works with the non-quantized linearized model.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
axolotl delinearize-llama4 --model path/to/model_dir --output path/to/output_dir
|
|
||||||
```
|
|
||||||
|
|
||||||
This would be necessary to use with other frameworks. If you have an adapter, merge it with the non-quantized linearized model before delinearizing.
|
|
||||||
|
|
||||||
|
|
||||||
## Legacy CLI Usage
|
## Legacy CLI Usage
|
||||||
|
|
||||||
While the new Click-based CLI is preferred, Axolotl still supports the legacy module-based CLI:
|
While the new Click-based CLI is preferred, Axolotl still supports the legacy module-based CLI:
|
||||||
|
|||||||
@@ -49,8 +49,7 @@ sections = [
|
|||||||
("Knowledge Distillation (KD)", "kd"),
|
("Knowledge Distillation (KD)", "kd"),
|
||||||
("Liger Kernels", "liger"),
|
("Liger Kernels", "liger"),
|
||||||
("Language Model Evaluation Harness (LM Eval)", "lm_eval"),
|
("Language Model Evaluation Harness (LM Eval)", "lm_eval"),
|
||||||
("Spectrum", "spectrum"),
|
("Spectrum", "spectrum")
|
||||||
("LLMCompressor", "llm_compressor")
|
|
||||||
]
|
]
|
||||||
|
|
||||||
for section_name, folder_name in sections:
|
for section_name, folder_name in sections:
|
||||||
|
|||||||
@@ -19,12 +19,6 @@ This guide covers all the ways you can install and set up Axolotl for your envir
|
|||||||
|
|
||||||
## Installation Methods {#sec-installation-methods}
|
## Installation Methods {#sec-installation-methods}
|
||||||
|
|
||||||
::: {.callout-important}
|
|
||||||
Please make sure to have Pytorch installed before installing Axolotl in your local environment.
|
|
||||||
|
|
||||||
Follow the instructions at: [https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/locally/)
|
|
||||||
:::
|
|
||||||
|
|
||||||
### PyPI Installation (Recommended) {#sec-pypi}
|
### PyPI Installation (Recommended) {#sec-pypi}
|
||||||
|
|
||||||
```{.bash}
|
```{.bash}
|
||||||
|
|||||||
@@ -1,62 +0,0 @@
|
|||||||
base_model: THUDM/GLM-4-32B-0414
|
|
||||||
# Automatically upload checkpoint and final model to HF
|
|
||||||
# hub_model_id: username/custom_model_name
|
|
||||||
|
|
||||||
load_in_4bit: true
|
|
||||||
|
|
||||||
datasets:
|
|
||||||
- path: teknium/GPT4-LLM-Cleaned
|
|
||||||
type: alpaca
|
|
||||||
dataset_prepared_path: last_run_prepared
|
|
||||||
val_set_size: 0
|
|
||||||
output_dir: ./outputs/qlora-out
|
|
||||||
|
|
||||||
adapter: qlora
|
|
||||||
lora_model_dir:
|
|
||||||
|
|
||||||
sequence_len: 2048
|
|
||||||
sample_packing: true
|
|
||||||
eval_sample_packing: true
|
|
||||||
pad_to_sequence_len: true
|
|
||||||
|
|
||||||
lora_r: 16
|
|
||||||
lora_alpha: 32
|
|
||||||
lora_dropout: 0.05
|
|
||||||
lora_target_modules:
|
|
||||||
- gate_proj
|
|
||||||
- down_proj
|
|
||||||
- up_proj
|
|
||||||
- q_proj
|
|
||||||
- v_proj
|
|
||||||
- k_proj
|
|
||||||
- o_proj
|
|
||||||
|
|
||||||
wandb_project:
|
|
||||||
wandb_entity:
|
|
||||||
wandb_watch:
|
|
||||||
wandb_name:
|
|
||||||
wandb_log_model:
|
|
||||||
|
|
||||||
gradient_accumulation_steps: 2
|
|
||||||
micro_batch_size: 2
|
|
||||||
num_epochs: 1
|
|
||||||
optimizer: adamw_8bit
|
|
||||||
lr_scheduler: cosine
|
|
||||||
learning_rate: 0.0002
|
|
||||||
|
|
||||||
bf16: auto
|
|
||||||
tf32: false
|
|
||||||
|
|
||||||
gradient_checkpointing: true
|
|
||||||
resume_from_checkpoint:
|
|
||||||
logging_steps: 1
|
|
||||||
flash_attention: true
|
|
||||||
|
|
||||||
loss_watchdog_threshold: 5.0
|
|
||||||
loss_watchdog_patience: 3
|
|
||||||
|
|
||||||
warmup_steps: 10
|
|
||||||
evals_per_epoch: 1
|
|
||||||
saves_per_epoch: 1
|
|
||||||
weight_decay: 0.0
|
|
||||||
special_tokens:
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
base_model: neuralmagic/Sparse-Llama-3.1-8B-2of4
|
|
||||||
|
|
||||||
plugins:
|
|
||||||
- axolotl.integrations.llm_compressor.LLMCompressorPlugin
|
|
||||||
|
|
||||||
load_in_8bit: false
|
|
||||||
load_in_4bit: false
|
|
||||||
strict: false
|
|
||||||
|
|
||||||
datasets:
|
|
||||||
- path: tatsu-lab/alpaca
|
|
||||||
type: alpaca
|
|
||||||
dataset_prepared_path: last_run_prepared
|
|
||||||
val_set_size: 0.05
|
|
||||||
output_dir: ./outputs/out
|
|
||||||
|
|
||||||
sequence_len: 4096
|
|
||||||
sample_packing: true
|
|
||||||
pad_to_sequence_len: true
|
|
||||||
eval_sample_packing: false
|
|
||||||
|
|
||||||
wandb_project:
|
|
||||||
wandb_entity:
|
|
||||||
wandb_watch:
|
|
||||||
wandb_name:
|
|
||||||
wandb_log_model:
|
|
||||||
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
micro_batch_size: 1
|
|
||||||
num_epochs: 1
|
|
||||||
optimizer: paged_adamw_8bit
|
|
||||||
lr_scheduler: cosine
|
|
||||||
learning_rate: 2e-5
|
|
||||||
|
|
||||||
train_on_inputs: false
|
|
||||||
group_by_length: false
|
|
||||||
bf16: auto
|
|
||||||
fp16:
|
|
||||||
tf32: false
|
|
||||||
|
|
||||||
gradient_checkpointing: true
|
|
||||||
gradient_checkpointing_kwargs:
|
|
||||||
use_reentrant: false
|
|
||||||
early_stopping_patience:
|
|
||||||
resume_from_checkpoint:
|
|
||||||
logging_steps: 1
|
|
||||||
xformers_attention:
|
|
||||||
flash_attention: true
|
|
||||||
|
|
||||||
warmup_steps: 100
|
|
||||||
evals_per_epoch: 2
|
|
||||||
eval_table_size:
|
|
||||||
saves_per_epoch: 1
|
|
||||||
debug:
|
|
||||||
deepspeed:
|
|
||||||
weight_decay: 0.0
|
|
||||||
fsdp:
|
|
||||||
fsdp_config:
|
|
||||||
special_tokens:
|
|
||||||
pad_token: <|end_of_text|>
|
|
||||||
|
|
||||||
llmcompressor:
|
|
||||||
recipe:
|
|
||||||
finetuning_stage:
|
|
||||||
finetuning_modifiers:
|
|
||||||
ConstantPruningModifier:
|
|
||||||
targets: [
|
|
||||||
're:.*q_proj.weight',
|
|
||||||
're:.*k_proj.weight',
|
|
||||||
're:.*v_proj.weight',
|
|
||||||
're:.*o_proj.weight',
|
|
||||||
're:.*gate_proj.weight',
|
|
||||||
're:.*up_proj.weight',
|
|
||||||
're:.*down_proj.weight',
|
|
||||||
]
|
|
||||||
start: 0
|
|
||||||
save_compressed: true
|
|
||||||
@@ -26,11 +26,3 @@ Multi-GPU (4xH100) for Llama 4 Scout uses 62.8GB VRAM/GPU @ 4k contenxt length @
|
|||||||
### Llama 4 Maverick 17Bx128Experts (400B)
|
### Llama 4 Maverick 17Bx128Experts (400B)
|
||||||
|
|
||||||
Coming Soon
|
Coming Soon
|
||||||
|
|
||||||
## Delinearized Llama 4 Models
|
|
||||||
|
|
||||||
We provide a script to delinearize Llama 4 linearized models into regular HuggingFace Llama 4 models.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
axolotl delinearize-llama4 --model path/to/model_dir --output path/to/output_dir
|
|
||||||
```
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
codecov
|
codecov
|
||||||
codecov-cli
|
|
||||||
pytest
|
pytest
|
||||||
pytest-cov
|
pytest-cov
|
||||||
pytest-retry
|
pytest-retry
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ triton>=3.0.0
|
|||||||
mamba-ssm==1.2.0.post1
|
mamba-ssm==1.2.0.post1
|
||||||
xformers>=0.0.23.post1
|
xformers>=0.0.23.post1
|
||||||
autoawq==0.2.7.post3
|
autoawq==0.2.7.post3
|
||||||
liger-kernel==0.5.8
|
liger-kernel==0.5.6
|
||||||
# END section
|
# END section
|
||||||
|
|
||||||
packaging==23.2
|
packaging==23.2
|
||||||
@@ -19,7 +19,6 @@ datasets==3.5.0
|
|||||||
deepspeed>=0.15.4
|
deepspeed>=0.15.4
|
||||||
trl==0.16.1
|
trl==0.16.1
|
||||||
hf_xet==1.0.0
|
hf_xet==1.0.0
|
||||||
hqq==0.2.5
|
|
||||||
|
|
||||||
optimum==1.16.2
|
optimum==1.16.2
|
||||||
hf_transfer
|
hf_transfer
|
||||||
|
|||||||
15
setup.py
15
setup.py
@@ -51,7 +51,7 @@ def parse_requirements(extras_require_map):
|
|||||||
try:
|
try:
|
||||||
torch_version = version("torch")
|
torch_version = version("torch")
|
||||||
except PackageNotFoundError:
|
except PackageNotFoundError:
|
||||||
torch_version = "2.6.0" # default to torch 2.6
|
torch_version = "2.5.1"
|
||||||
_install_requires.append(f"torch=={torch_version}")
|
_install_requires.append(f"torch=={torch_version}")
|
||||||
|
|
||||||
version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version)
|
version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version)
|
||||||
@@ -64,15 +64,9 @@ def parse_requirements(extras_require_map):
|
|||||||
else:
|
else:
|
||||||
raise ValueError("Invalid version format")
|
raise ValueError("Invalid version format")
|
||||||
|
|
||||||
if (major, minor) >= (2, 7):
|
if (major, minor) >= (2, 6):
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
# _install_requires.append("xformers==0.0.29.post3") # xformers seems to be hard pinned to 2.6.0
|
_install_requires.append("xformers==0.0.29.post2")
|
||||||
extras_require_map["vllm"] = ["vllm==0.8.3"]
|
|
||||||
elif (major, minor) >= (2, 6):
|
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
|
||||||
_install_requires.append(
|
|
||||||
"xformers==0.0.29.post2"
|
|
||||||
) # vllm needs post2 w torch 2.6
|
|
||||||
extras_require_map["vllm"] = ["vllm==0.8.3"]
|
extras_require_map["vllm"] = ["vllm==0.8.3"]
|
||||||
elif (major, minor) >= (2, 5):
|
elif (major, minor) >= (2, 5):
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
@@ -149,9 +143,6 @@ extras_require = {
|
|||||||
"vllm": [
|
"vllm": [
|
||||||
"vllm==0.7.2",
|
"vllm==0.7.2",
|
||||||
],
|
],
|
||||||
"llmcompressor": [
|
|
||||||
"llmcompressor==0.5.1",
|
|
||||||
],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
install_requires, dependency_links, extras_require_build = parse_requirements(
|
install_requires, dependency_links, extras_require_build = parse_requirements(
|
||||||
|
|||||||
@@ -39,16 +39,16 @@ class TrainerCliArgs:
|
|||||||
class VllmServeCliArgs:
|
class VllmServeCliArgs:
|
||||||
"""Dataclass with CLI arguments for `axolotl vllm-serve` command."""
|
"""Dataclass with CLI arguments for `axolotl vllm-serve` command."""
|
||||||
|
|
||||||
tensor_parallel_size: Optional[int] = field(
|
tensor_parallel_size: int = field(
|
||||||
default=None,
|
default=1,
|
||||||
metadata={"help": "Number of tensor parallel workers to use."},
|
metadata={"help": "Number of tensor parallel workers to use."},
|
||||||
)
|
)
|
||||||
host: Optional[str] = field(
|
host: str = field(
|
||||||
default=None, # nosec B104
|
default="0.0.0.0", # nosec B104
|
||||||
metadata={"help": "Host address to run the server on."},
|
metadata={"help": "Host address to run the server on."},
|
||||||
)
|
)
|
||||||
port: Optional[int] = field(
|
port: int = field(
|
||||||
default=None,
|
default=8000,
|
||||||
metadata={"help": "Port to run the server on."},
|
metadata={"help": "Port to run the server on."},
|
||||||
)
|
)
|
||||||
gpu_memory_utilization: Optional[float] = field(
|
gpu_memory_utilization: Optional[float] = field(
|
||||||
|
|||||||
@@ -1040,11 +1040,9 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
|
|||||||
if self.cfg.dataset_processes:
|
if self.cfg.dataset_processes:
|
||||||
training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
|
training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
|
||||||
|
|
||||||
if self.cfg.trl and self.cfg.trl.beta is not None:
|
if (self.cfg.trl and self.cfg.trl.beta) or self.cfg.rl_beta:
|
||||||
training_args_kwargs["beta"] = self.cfg.trl.beta
|
training_args_kwargs["beta"] = self.cfg.trl.beta or self.cfg.rl_beta
|
||||||
elif self.cfg.rl_beta is not None:
|
if self.cfg.orpo_alpha:
|
||||||
training_args_kwargs["beta"] = self.cfg.rl_beta
|
|
||||||
elif self.cfg.orpo_alpha is not None:
|
|
||||||
# trl does some odd mapping of alpha to beta to reuse the beta parameter ???
|
# trl does some odd mapping of alpha to beta to reuse the beta parameter ???
|
||||||
training_args_kwargs["beta"] = self.cfg.orpo_alpha
|
training_args_kwargs["beta"] = self.cfg.orpo_alpha
|
||||||
|
|
||||||
|
|||||||
@@ -40,8 +40,8 @@ class GRPOStrategy:
|
|||||||
|
|
||||||
if trl.use_vllm:
|
if trl.use_vllm:
|
||||||
grpo_args_kwargs["use_vllm"] = trl.use_vllm
|
grpo_args_kwargs["use_vllm"] = trl.use_vllm
|
||||||
grpo_args_kwargs["vllm_server_host"] = trl.vllm_server_host or trl.vllm.host
|
grpo_args_kwargs["vllm_server_host"] = trl.vllm_server_host
|
||||||
grpo_args_kwargs["vllm_server_port"] = trl.vllm_server_port or trl.vllm.port
|
grpo_args_kwargs["vllm_server_port"] = trl.vllm_server_port
|
||||||
if trl.vllm_server_timeout:
|
if trl.vllm_server_timeout:
|
||||||
grpo_args_kwargs["vllm_server_timeout"] = trl.vllm_server_timeout
|
grpo_args_kwargs["vllm_server_timeout"] = trl.vllm_server_timeout
|
||||||
if trl.vllm_guided_decoding_regex:
|
if trl.vllm_guided_decoding_regex:
|
||||||
|
|||||||
@@ -47,8 +47,6 @@ cut_cross_entropy: true
|
|||||||
- qwen2
|
- qwen2
|
||||||
- cohere
|
- cohere
|
||||||
- cohere2
|
- cohere2
|
||||||
- glm
|
|
||||||
- glm4
|
|
||||||
|
|
||||||
## Citation
|
## Citation
|
||||||
|
|
||||||
|
|||||||
@@ -1,57 +0,0 @@
|
|||||||
"""GLM 4 patch. GLM family inherits from Llama."""
|
|
||||||
|
|
||||||
from types import MethodType
|
|
||||||
|
|
||||||
import transformers
|
|
||||||
from cut_cross_entropy.transformers.utils import (
|
|
||||||
PatchOptions,
|
|
||||||
TransformersModelT,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def patch_glm(
|
|
||||||
maybe_model: TransformersModelT | str | transformers.PretrainedConfig,
|
|
||||||
patch_options: PatchOptions,
|
|
||||||
) -> TransformersModelT | None:
|
|
||||||
|
|
||||||
# Set the _PATCH_OPTS in the llama patch file
|
|
||||||
import cut_cross_entropy.transformers.llama as llama_patch
|
|
||||||
|
|
||||||
llama_patch._PATCH_OPTS = patch_options # pylint: disable=protected-access
|
|
||||||
|
|
||||||
from cut_cross_entropy.transformers.llama import cce_forward
|
|
||||||
from transformers.models.glm import modeling_glm
|
|
||||||
|
|
||||||
if isinstance(maybe_model, transformers.PreTrainedModel):
|
|
||||||
assert isinstance(
|
|
||||||
maybe_model, modeling_glm.GlmForCausalLM
|
|
||||||
), f"Expected a GlmForCausalLM model. Got {type(maybe_model)}."
|
|
||||||
maybe_model.forward = MethodType(cce_forward, maybe_model)
|
|
||||||
return maybe_model
|
|
||||||
|
|
||||||
modeling_glm.GlmForCausalLM.forward = cce_forward
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def patch_glm4(
|
|
||||||
maybe_model: TransformersModelT | str | transformers.PretrainedConfig,
|
|
||||||
patch_options: PatchOptions,
|
|
||||||
) -> TransformersModelT | None:
|
|
||||||
|
|
||||||
# Set the _PATCH_OPTS in the llama patch file
|
|
||||||
import cut_cross_entropy.transformers.llama as llama_patch
|
|
||||||
|
|
||||||
llama_patch._PATCH_OPTS = patch_options # pylint: disable=protected-access
|
|
||||||
|
|
||||||
from cut_cross_entropy.transformers.llama import cce_forward
|
|
||||||
from transformers.models.glm4 import modeling_glm4
|
|
||||||
|
|
||||||
if isinstance(maybe_model, transformers.PreTrainedModel):
|
|
||||||
assert isinstance(
|
|
||||||
maybe_model, modeling_glm4.Glm4ForCausalLM
|
|
||||||
), f"Expected a Glm4ForCausalLM model. Got {type(maybe_model)}."
|
|
||||||
maybe_model.forward = MethodType(cce_forward, maybe_model)
|
|
||||||
return maybe_model
|
|
||||||
|
|
||||||
modeling_glm4.Glm4ForCausalLM.forward = cce_forward
|
|
||||||
return None
|
|
||||||
@@ -20,10 +20,6 @@ from axolotl.integrations.cut_cross_entropy.monkeypatch.gemma3 import (
|
|||||||
patch_gemma3,
|
patch_gemma3,
|
||||||
patch_gemma3_text,
|
patch_gemma3_text,
|
||||||
)
|
)
|
||||||
from axolotl.integrations.cut_cross_entropy.monkeypatch.glm4 import (
|
|
||||||
patch_glm,
|
|
||||||
patch_glm4,
|
|
||||||
)
|
|
||||||
from axolotl.integrations.cut_cross_entropy.monkeypatch.llama4 import (
|
from axolotl.integrations.cut_cross_entropy.monkeypatch.llama4 import (
|
||||||
patch_llama4,
|
patch_llama4,
|
||||||
patch_llama4_text,
|
patch_llama4_text,
|
||||||
@@ -49,8 +45,6 @@ CUT_CROSS_ENTROPY_MODEL_MAPPING = {
|
|||||||
"qwen2": patch_qwen2,
|
"qwen2": patch_qwen2,
|
||||||
"cohere": patch_cohere,
|
"cohere": patch_cohere,
|
||||||
"cohere2": patch_cohere2,
|
"cohere2": patch_cohere2,
|
||||||
"glm": patch_glm,
|
|
||||||
"glm4": patch_glm4,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ liger_fused_linear_cross_entropy: true
|
|||||||
- deepseek_v2
|
- deepseek_v2
|
||||||
- gemma
|
- gemma
|
||||||
- gemma2
|
- gemma2
|
||||||
- gemma3
|
- gemma3 (partial support, no support for FLCE yet)
|
||||||
- granite
|
- granite
|
||||||
- jamba
|
- jamba
|
||||||
- llama
|
- llama
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ It is designed to be performant, correct, and light-weight.
|
|||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
from axolotl.integrations.base import BasePlugin
|
from axolotl.integrations.base import BasePlugin
|
||||||
|
|
||||||
@@ -54,6 +55,7 @@ class LigerPlugin(BasePlugin):
|
|||||||
)
|
)
|
||||||
from liger_kernel.transformers.cross_entropy import LigerCrossEntropyLoss
|
from liger_kernel.transformers.cross_entropy import LigerCrossEntropyLoss
|
||||||
from liger_kernel.transformers.functional import liger_cross_entropy
|
from liger_kernel.transformers.functional import liger_cross_entropy
|
||||||
|
from liger_kernel.transformers.geglu import LigerGEGLUMLP
|
||||||
from liger_kernel.transformers.layer_norm import LigerLayerNorm
|
from liger_kernel.transformers.layer_norm import LigerLayerNorm
|
||||||
from liger_kernel.transformers.monkey_patch import MODEL_TYPE_TO_APPLY_LIGER_FN
|
from liger_kernel.transformers.monkey_patch import MODEL_TYPE_TO_APPLY_LIGER_FN
|
||||||
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
||||||
@@ -139,6 +141,38 @@ class LigerPlugin(BasePlugin):
|
|||||||
modeling_mod.CrossEntropyLoss = LigerCrossEntropyLoss
|
modeling_mod.CrossEntropyLoss = LigerCrossEntropyLoss
|
||||||
if cfg.liger_fused_linear_cross_entropy:
|
if cfg.liger_fused_linear_cross_entropy:
|
||||||
modeling_mod.DeepseekV2ForCausalLM.forward = deepseekv2_lce_forward
|
modeling_mod.DeepseekV2ForCausalLM.forward = deepseekv2_lce_forward
|
||||||
|
elif cfg.model_config_type in ["gemma3", "gemma3_text"]:
|
||||||
|
from transformers.models.gemma3 import modeling_gemma3
|
||||||
|
|
||||||
|
if cfg.liger_rope:
|
||||||
|
modeling_gemma3.apply_rotary_pos_emb = liger_rotary_pos_emb
|
||||||
|
if cfg.liger_rms_norm:
|
||||||
|
|
||||||
|
def _liger_rms_norm_wrapper(dim, **kwargs):
|
||||||
|
"Convert 'dim' keyword to 'hidden_size' to pass to LigerRMSNorm"
|
||||||
|
return LigerRMSNorm(hidden_size=dim, **kwargs)
|
||||||
|
|
||||||
|
modeling_gemma3.Gemma3RMSNorm = partial(
|
||||||
|
_liger_rms_norm_wrapper,
|
||||||
|
offset=1.0,
|
||||||
|
casting_mode="gemma",
|
||||||
|
init_fn="zeros",
|
||||||
|
in_place=False,
|
||||||
|
)
|
||||||
|
if cfg.liger_glu_activation:
|
||||||
|
modeling_gemma3.Gemma3MLP = LigerGEGLUMLP
|
||||||
|
if cfg.liger_layer_norm:
|
||||||
|
modeling_gemma3.nn.LayerNorm = LigerLayerNorm
|
||||||
|
|
||||||
|
if cfg.liger_cross_entropy:
|
||||||
|
from transformers.loss.loss_utils import nn
|
||||||
|
|
||||||
|
nn.functional.cross_entropy = liger_cross_entropy
|
||||||
|
|
||||||
|
if cfg.liger_fused_linear_cross_entropy:
|
||||||
|
raise NotImplementedError(
|
||||||
|
"Fused linear cross entropy is not yet supported for Gemma3."
|
||||||
|
)
|
||||||
elif cfg.model_config_type == "llama4":
|
elif cfg.model_config_type == "llama4":
|
||||||
from axolotl.integrations.liger.models.llama4 import (
|
from axolotl.integrations.liger.models.llama4 import (
|
||||||
apply_liger_kernel_to_llama4,
|
apply_liger_kernel_to_llama4,
|
||||||
|
|||||||
@@ -1,108 +0,0 @@
|
|||||||
# LLMCompressor Integration
|
|
||||||
|
|
||||||
Fine-tune sparsified models in Axolotl using Neural Magic's [LLMCompressor](https://github.com/vllm-project/llm-compressor).
|
|
||||||
|
|
||||||
This integration enables fine-tuning of models sparsified using LLMCompressor within the Axolotl training framework. By combining LLMCompressor's model compression capabilities with Axolotl's distributed training pipelines, users can efficiently fine-tune sparse models at scale.
|
|
||||||
|
|
||||||
It uses Axolotl’s plugin system to hook into the fine-tuning flows while maintaining sparsity throughout training.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Requirements
|
|
||||||
|
|
||||||
- Axolotl with `llmcompressor` extras:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install "axolotl[llmcompressor]"
|
|
||||||
```
|
|
||||||
|
|
||||||
- Requires `llmcompressor >= 0.5.1`
|
|
||||||
|
|
||||||
This will install all necessary dependencies to fine-tune sparsified models using the integration.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
To enable sparse fine-tuning with this integration, include the plugin in your Axolotl config:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
plugins:
|
|
||||||
- axolotl.integrations.llm_compressor.LLMCompressorPlugin
|
|
||||||
|
|
||||||
llmcompressor:
|
|
||||||
recipe:
|
|
||||||
finetuning_stage:
|
|
||||||
finetuning_modifiers:
|
|
||||||
ConstantPruningModifier:
|
|
||||||
targets: [
|
|
||||||
're:.*q_proj.weight',
|
|
||||||
're:.*k_proj.weight',
|
|
||||||
're:.*v_proj.weight',
|
|
||||||
're:.*o_proj.weight',
|
|
||||||
're:.*gate_proj.weight',
|
|
||||||
're:.*up_proj.weight',
|
|
||||||
're:.*down_proj.weight',
|
|
||||||
]
|
|
||||||
start: 0
|
|
||||||
save_compressed: true
|
|
||||||
# ... (other training arguments)
|
|
||||||
```
|
|
||||||
|
|
||||||
This plugin **does not apply pruning or sparsification itself** — it is intended for **fine-tuning models that have already been sparsified**.
|
|
||||||
|
|
||||||
Pre-sparsified checkpoints can be:
|
|
||||||
- Generated using [LLMCompressor](https://github.com/vllm-project/llm-compressor)
|
|
||||||
- Downloaded from [Neural Magic's Hugging Face page](https://huggingface.co/neuralmagic)
|
|
||||||
- Any custom LLM with compatible sparsity patterns that you've created yourself
|
|
||||||
|
|
||||||
To learn more about writing and customizing LLMCompressor recipes, refer to the official documentation:
|
|
||||||
[https://github.com/vllm-project/llm-compressor/blob/main/README.md](https://github.com/vllm-project/llm-compressor/blob/main/README.md)
|
|
||||||
|
|
||||||
### Storage Optimization with save_compressed
|
|
||||||
|
|
||||||
Setting `save_compressed: true` in your configuration enables saving models in a compressed format, which:
|
|
||||||
- Reduces disk space usage by approximately 40%
|
|
||||||
- Maintains compatibility with vLLM for accelerated inference
|
|
||||||
- Maintains compatibility with llmcompressor for further optimization (example: quantization)
|
|
||||||
|
|
||||||
This option is highly recommended when working with sparse models to maximize the benefits of model compression.
|
|
||||||
|
|
||||||
### Example Config
|
|
||||||
|
|
||||||
See [`examples/llama-3/sparse-finetuning.yaml`](examples/llama-3/sparse-finetuning.yaml) for a complete example.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Inference with vLLM
|
|
||||||
|
|
||||||
After fine-tuning your sparse model, you can leverage vLLM for efficient inference.
|
|
||||||
You can also use LLMCompressor to apply additional quantization to your fine-tuned
|
|
||||||
sparse model before inference for even greater performance benefits.:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from vllm import LLM, SamplingParams
|
|
||||||
|
|
||||||
prompts = [
|
|
||||||
"Hello, my name is",
|
|
||||||
"The president of the United States is",
|
|
||||||
"The capital of France is",
|
|
||||||
"The future of AI is",
|
|
||||||
]
|
|
||||||
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
|
|
||||||
llm = LLM("path/to/your/sparse/model")
|
|
||||||
outputs = llm.generate(prompts, sampling_params)
|
|
||||||
|
|
||||||
for output in outputs:
|
|
||||||
prompt = output.prompt
|
|
||||||
generated_text = output.outputs[0].text
|
|
||||||
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
|
||||||
```
|
|
||||||
|
|
||||||
For more details on vLLM's capabilities and advanced configuration options, see the [official vLLM documentation](https://docs.vllm.ai/).
|
|
||||||
|
|
||||||
## Learn More
|
|
||||||
|
|
||||||
For details on available sparsity and quantization schemes, fine-tuning recipes, and usage examples, visit the official LLMCompressor repository:
|
|
||||||
|
|
||||||
[https://github.com/vllm-project/llm-compressor](https://github.com/vllm-project/llm-compressor)
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
"""Integration entry point for the LLMCompressor plugin."""
|
|
||||||
|
|
||||||
from .plugin import LLMCompressorPlugin
|
|
||||||
|
|
||||||
__all__ = ["LLMCompressorPlugin"]
|
|
||||||
@@ -1,40 +0,0 @@
|
|||||||
"""
|
|
||||||
LLMCompressor and Sparse Finetuning config models.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from typing_extensions import Annotated
|
|
||||||
|
|
||||||
|
|
||||||
class CompressionArgs(BaseModel):
|
|
||||||
"""Sparse Finetuning config for LLMCompressor."""
|
|
||||||
|
|
||||||
# Typing for recipe is set to Any due to:
|
|
||||||
# https://github.com/vllm-project/llm-compressor/issues/1319
|
|
||||||
recipe: Annotated[
|
|
||||||
Any,
|
|
||||||
Field(
|
|
||||||
description="The recipe containing the compression algorithms and hyperparameters to apply."
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
save_compressed: Annotated[
|
|
||||||
bool,
|
|
||||||
Field(
|
|
||||||
default=False,
|
|
||||||
description="Whether to save the compressed model after training.",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class LLMCompressorArgs(BaseModel):
|
|
||||||
"""LLMCompressor configuration BaseModel."""
|
|
||||||
|
|
||||||
llmcompressor: Annotated[
|
|
||||||
CompressionArgs,
|
|
||||||
Field(
|
|
||||||
description="Arguments enabling compression pathways through the LLM Compressor plugins"
|
|
||||||
),
|
|
||||||
]
|
|
||||||
@@ -1,171 +0,0 @@
|
|||||||
"""
|
|
||||||
Sparse Finetuning plugin for Axolotl — enables handling of sparse neural networks
|
|
||||||
by maintaining masks for zero weights during training.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from functools import wraps
|
|
||||||
from typing import Any, Callable, Concatenate, ParamSpec, TypeVar
|
|
||||||
|
|
||||||
from llmcompressor import active_session, create_session
|
|
||||||
from llmcompressor.core import callbacks as session_callbacks
|
|
||||||
from llmcompressor.recipe import Recipe
|
|
||||||
from torch.nn import Module
|
|
||||||
from transformers.trainer import Trainer
|
|
||||||
from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState
|
|
||||||
from transformers.training_args import TrainingArguments
|
|
||||||
|
|
||||||
from axolotl.integrations.base import BasePlugin
|
|
||||||
|
|
||||||
P = ParamSpec("P") # Params for generic function signatures
|
|
||||||
R = TypeVar("R") # Return type for generic function signatures
|
|
||||||
|
|
||||||
LOG = logging.getLogger("axolotl.integrations.llm_compressor")
|
|
||||||
|
|
||||||
|
|
||||||
class LLMCompressorCallbackHandler(TrainerCallback):
|
|
||||||
"""
|
|
||||||
Trainer callback for Sparse Finetuning.
|
|
||||||
Maintains sparsity patterns during training by applying masks after optimization steps,
|
|
||||||
ensuring zero-weight updates are canceled out.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, trainer: Trainer, recipe: Any):
|
|
||||||
"""
|
|
||||||
Initialize the Sparse Finetuning callback handler.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
trainer (Trainer): Huggingface Trainer instance.
|
|
||||||
recipe (Recipe | dict): Sparse finetuning recipe to apply.
|
|
||||||
"""
|
|
||||||
super().__init__()
|
|
||||||
self.trainer = trainer
|
|
||||||
self.recipe = (
|
|
||||||
Recipe.model_validate(recipe) if not isinstance(recipe, Recipe) else recipe
|
|
||||||
)
|
|
||||||
self.original_compute_loss = trainer.compute_loss
|
|
||||||
self.trainer.compute_loss = compute_loss_wrapper(self.trainer.compute_loss)
|
|
||||||
create_session()
|
|
||||||
|
|
||||||
def on_train_begin(
|
|
||||||
self,
|
|
||||||
args: TrainingArguments,
|
|
||||||
state: TrainerState,
|
|
||||||
control: TrainerControl,
|
|
||||||
**kwargs,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Called at the beginning of training. Initializes the compression session.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
args (TrainingArguments): Training arguments.
|
|
||||||
state (TrainerState): Trainer state.
|
|
||||||
control (TrainerControl): Trainer control.
|
|
||||||
"""
|
|
||||||
super().on_train_begin(args, state, control, **kwargs)
|
|
||||||
self.trainer.accelerator.wait_for_everyone()
|
|
||||||
active_session().initialize(
|
|
||||||
model=self.trainer.model,
|
|
||||||
optimizer=self.trainer.optimizer,
|
|
||||||
start=state.epoch,
|
|
||||||
recipe=self.recipe,
|
|
||||||
)
|
|
||||||
self.trainer.accelerator.wait_for_everyone()
|
|
||||||
|
|
||||||
def on_step_begin(
|
|
||||||
self,
|
|
||||||
args: TrainingArguments,
|
|
||||||
state: TrainerState,
|
|
||||||
control: TrainerControl,
|
|
||||||
**kwargs,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Called at the beginning of a training step. Triggers batch_start callback.
|
|
||||||
"""
|
|
||||||
super().on_step_begin(args, state, control, **kwargs)
|
|
||||||
session_callbacks.batch_start()
|
|
||||||
|
|
||||||
def on_step_end(
|
|
||||||
self,
|
|
||||||
args: TrainingArguments,
|
|
||||||
state: TrainerState,
|
|
||||||
control: TrainerControl,
|
|
||||||
**kwargs,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Called at the end of a training step. Triggers optimizer and batch_end callbacks.
|
|
||||||
"""
|
|
||||||
super().on_step_end(args, state, control, **kwargs)
|
|
||||||
session_callbacks.optim_pre_step()
|
|
||||||
session_callbacks.optim_post_step()
|
|
||||||
session_callbacks.batch_end()
|
|
||||||
|
|
||||||
def on_train_end(
|
|
||||||
self,
|
|
||||||
args: TrainingArguments,
|
|
||||||
state: TrainerState,
|
|
||||||
control: TrainerControl,
|
|
||||||
**kwargs,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Called at the end of training. Finalizes the compression session.
|
|
||||||
"""
|
|
||||||
super().on_train_end(args, state, control, **kwargs)
|
|
||||||
active_session().finalize()
|
|
||||||
self.trainer.compute_loss_func = self.original_compute_loss
|
|
||||||
|
|
||||||
|
|
||||||
class LLMCompressorPlugin(BasePlugin):
|
|
||||||
"""
|
|
||||||
Sparse Finetuning plugin for Axolotl integration.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def get_input_args(self) -> str:
|
|
||||||
"""
|
|
||||||
Returns the path to the plugin's argument definition.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: Dotted path to the LLMCompressorArgs class.
|
|
||||||
"""
|
|
||||||
return "axolotl.integrations.llm_compressor.args.LLMCompressorArgs"
|
|
||||||
|
|
||||||
def add_callbacks_post_trainer(self, cfg: Any, trainer: Trainer) -> list:
|
|
||||||
"""
|
|
||||||
Adds Sparse Finetuning callback to the Trainer instance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
cfg (Any): Configuration object containing the sparse recipe.
|
|
||||||
trainer (Trainer): Huggingface Trainer instance.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
list: List containing the configured callback instances.
|
|
||||||
"""
|
|
||||||
LOG.info("Adding Sparse Finetuning callback to the trainer")
|
|
||||||
callback = LLMCompressorCallbackHandler(
|
|
||||||
trainer=trainer,
|
|
||||||
recipe=cfg.llmcompressor.recipe,
|
|
||||||
)
|
|
||||||
return [callback]
|
|
||||||
|
|
||||||
|
|
||||||
def compute_loss_wrapper(
|
|
||||||
compute_loss_func: Callable[Concatenate[Module, P], R],
|
|
||||||
) -> Callable[Concatenate[Module, P], R]:
|
|
||||||
"""
|
|
||||||
Wraps the loss computation function to trigger the loss_calculated callback.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
compute_loss_func (Callable): Original loss computation function.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Callable: Wrapped function that also invokes the loss_calculated callback.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@wraps(compute_loss_func)
|
|
||||||
def compute_and_notify(model: Module, *args: P.args, **kwargs: P.kwargs) -> R:
|
|
||||||
loss = compute_loss_func(model, *args, **kwargs)
|
|
||||||
if active_session().lifecycle.initialized_ and model.training:
|
|
||||||
session_callbacks.loss_calculated(loss=loss)
|
|
||||||
return loss
|
|
||||||
|
|
||||||
return compute_and_notify
|
|
||||||
@@ -1,40 +0,0 @@
|
|||||||
"""Utilities for llmcompressor integration with axolotl."""
|
|
||||||
|
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
from llmcompressor.transformers.sparsification.compressed_tensors_utils import (
|
|
||||||
modify_save_pretrained,
|
|
||||||
)
|
|
||||||
from transformers import PreTrainedModel, Trainer
|
|
||||||
|
|
||||||
|
|
||||||
def save_compressed_model(
|
|
||||||
model: PreTrainedModel,
|
|
||||||
output_dir: Union[str, bytes],
|
|
||||||
trainer: Trainer,
|
|
||||||
safe_serialization: bool = False,
|
|
||||||
save_compressed: bool = False,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Synchronize processes, apply compression hooks, and save the model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model (PreTrainedModel): The model to be saved.
|
|
||||||
output_dir (str or bytes): Path where the model files will be written.
|
|
||||||
trainer (Trainer): Hugging Face Trainer for process synchronization.
|
|
||||||
safe_serialization (bool): Use safe serialization if True.
|
|
||||||
save_compressed (bool): Write compressed tensors if True.
|
|
||||||
"""
|
|
||||||
trainer.accelerator.wait_for_everyone()
|
|
||||||
|
|
||||||
# Only the main process writes the files
|
|
||||||
if not trainer.accelerator.is_main_process:
|
|
||||||
return
|
|
||||||
|
|
||||||
modify_save_pretrained(model)
|
|
||||||
model.save_pretrained(
|
|
||||||
output_dir,
|
|
||||||
safe_serialization=safe_serialization,
|
|
||||||
save_compressed=save_compressed,
|
|
||||||
skip_sparsity_compression_stats=not save_compressed,
|
|
||||||
)
|
|
||||||
@@ -31,8 +31,6 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
|
|||||||
"starcoder2",
|
"starcoder2",
|
||||||
"deepseek_v2",
|
"deepseek_v2",
|
||||||
"deepseek_v3",
|
"deepseek_v3",
|
||||||
"glm",
|
|
||||||
"glm4",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -272,7 +272,7 @@ class ReLoRAScheduler(LRScheduler):
|
|||||||
self.warmup_steps = warmup_steps
|
self.warmup_steps = warmup_steps
|
||||||
self.anneal_steps = anneal_steps
|
self.anneal_steps = anneal_steps
|
||||||
self.min_lr_scale = min_lr_scale
|
self.min_lr_scale = min_lr_scale
|
||||||
super().__init__(optimizer, inner_schedule.last_epoch)
|
super().__init__(optimizer, inner_schedule.last_epoch, inner_schedule.verbose)
|
||||||
|
|
||||||
def get_lr(self) -> float:
|
def get_lr(self) -> float:
|
||||||
self.inner_schedule.last_epoch = self.last_epoch
|
self.inner_schedule.last_epoch = self.last_epoch
|
||||||
|
|||||||
@@ -271,19 +271,6 @@ def save_trained_model(
|
|||||||
os.remove(os.path.join(cfg.output_dir, "model.safetensors"))
|
os.remove(os.path.join(cfg.output_dir, "model.safetensors"))
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
elif hasattr(cfg, "llmcompressor") and cfg.llmcompressor:
|
|
||||||
from axolotl.integrations.llm_compressor.utils import (
|
|
||||||
save_compressed_model,
|
|
||||||
)
|
|
||||||
|
|
||||||
save_compressed_model(
|
|
||||||
model=model,
|
|
||||||
output_dir=cfg.output_dir,
|
|
||||||
trainer=trainer,
|
|
||||||
safe_serialization=safe_serialization,
|
|
||||||
save_compressed=cfg.llmcompressor.save_compressed,
|
|
||||||
)
|
|
||||||
|
|
||||||
elif cfg.local_rank == 0:
|
elif cfg.local_rank == 0:
|
||||||
if cfg.flash_optimum and BetterTransformer:
|
if cfg.flash_optimum and BetterTransformer:
|
||||||
model = BetterTransformer.reverse(model)
|
model = BetterTransformer.reverse(model)
|
||||||
@@ -292,7 +279,6 @@ def save_trained_model(
|
|||||||
trainer.model.save_pretrained(
|
trainer.model.save_pretrained(
|
||||||
cfg.output_dir, safe_serialization=safe_serialization
|
cfg.output_dir, safe_serialization=safe_serialization
|
||||||
)
|
)
|
||||||
|
|
||||||
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
import functools
|
import functools
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Tuple, Union
|
from typing import List, Optional, Tuple, Union
|
||||||
|
|
||||||
@@ -118,27 +117,9 @@ def prepare_dataset(cfg, tokenizer, processor=None, preprocess_iterable=None):
|
|||||||
cfg.pretraining_dataset[0]["type"] or "pretrain",
|
cfg.pretraining_dataset[0]["type"] or "pretrain",
|
||||||
)
|
)
|
||||||
|
|
||||||
# when letting accelerator dispatch batches from the main process, we don't need to load the dataset from
|
iter_ds = load_dataset(
|
||||||
# other ranks, we just need to present a fake dataset
|
path, streaming=True, split=split, name=name, data_files=data_files
|
||||||
if (
|
)
|
||||||
cfg.accelerator_config
|
|
||||||
and cfg.accelerator_config.dispatch_batches
|
|
||||||
and not is_local_main_process()
|
|
||||||
):
|
|
||||||
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f:
|
|
||||||
f.write("text\n")
|
|
||||||
f.write("lorem ipsum dolor sit amet\n")
|
|
||||||
# rewind the file pointer to the beginning so we can read it again
|
|
||||||
f.seek(0)
|
|
||||||
iter_ds = load_dataset(
|
|
||||||
"csv", data_files=f.name, split="train", streaming=True
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if is_local_main_process():
|
|
||||||
iter_ds = load_dataset(
|
|
||||||
path, streaming=True, split=split, name=name, data_files=data_files
|
|
||||||
)
|
|
||||||
|
|
||||||
if skip:
|
if skip:
|
||||||
LOG.info(f"Skipping {skip} samples from the dataset")
|
LOG.info(f"Skipping {skip} samples from the dataset")
|
||||||
iter_ds = iter_ds.skip(skip)
|
iter_ds = iter_ds.skip(skip)
|
||||||
|
|||||||
@@ -139,22 +139,6 @@ def check_model_config(cfg: DictDefault, model_config: PretrainedConfig):
|
|||||||
hasattr(model_config, "quantization_config")
|
hasattr(model_config, "quantization_config")
|
||||||
and model_config.quantization_config
|
and model_config.quantization_config
|
||||||
)
|
)
|
||||||
|
|
||||||
# Detect compressed-tensors config
|
|
||||||
is_compressed_tensors_config = (
|
|
||||||
quant_config_exists
|
|
||||||
and model_config.quantization_config.get("quant_method") == "compressed-tensors"
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_compressed_tensors_config:
|
|
||||||
if model_config.quantization_config.get("config_groups"):
|
|
||||||
LOG.warning(
|
|
||||||
"Found `config_groups` in a compressed-tensors config. "
|
|
||||||
"QAT integration with llmcompressor is not tested."
|
|
||||||
)
|
|
||||||
# Skip further quant checks for compressed-tensors
|
|
||||||
return
|
|
||||||
|
|
||||||
quant_config_method_is_gptq = (
|
quant_config_method_is_gptq = (
|
||||||
quant_config_exists
|
quant_config_exists
|
||||||
and "quant_method" in model_config.quantization_config
|
and "quant_method" in model_config.quantization_config
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ class RexLR(LRScheduler):
|
|||||||
self.max_lr = max_lr
|
self.max_lr = max_lr
|
||||||
self.total_steps = total_steps
|
self.total_steps = total_steps
|
||||||
self.num_warmup_steps = num_warmup_steps
|
self.num_warmup_steps = num_warmup_steps
|
||||||
self.last_step = max(last_step - 1, 0)
|
self.last_step = last_step - 1
|
||||||
|
|
||||||
# Ensure each parameter group has an "initial_lr" key to avoid issues when resuming.
|
# Ensure each parameter group has an "initial_lr" key to avoid issues when resuming.
|
||||||
for group in optimizer.param_groups:
|
for group in optimizer.param_groups:
|
||||||
|
|||||||
@@ -660,7 +660,6 @@ class AxolotlInputConfig(
|
|||||||
data.get("val_set_size") == 0
|
data.get("val_set_size") == 0
|
||||||
and (data.get("eval_steps") or data.get("eval_strategy"))
|
and (data.get("eval_steps") or data.get("eval_strategy"))
|
||||||
and not data.get("test_datasets")
|
and not data.get("test_datasets")
|
||||||
and data.get("eval_strategy") != "no"
|
|
||||||
):
|
):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"eval_steps and eval_strategy are not supported with val_set_size == 0"
|
"eval_steps and eval_strategy are not supported with val_set_size == 0"
|
||||||
|
|||||||
@@ -36,11 +36,3 @@ class VllmConfig(BaseModel):
|
|||||||
default=None,
|
default=None,
|
||||||
json_schema_extra={"description": "Enable prefix caching for VLLM"},
|
json_schema_extra={"description": "Enable prefix caching for VLLM"},
|
||||||
)
|
)
|
||||||
host: str | None = Field(
|
|
||||||
default="0.0.0.0", # nosec B104
|
|
||||||
json_schema_extra={"description": "Host for the vLLM server to start on"},
|
|
||||||
)
|
|
||||||
port: int | None = Field(
|
|
||||||
default=8000,
|
|
||||||
json_schema_extra={"description": "Port of the vLLM server to start on"},
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -193,14 +193,6 @@ def download_tiny_shakespeare_dataset():
|
|||||||
snapshot_download_w_retry("winglian/tiny-shakespeare", repo_type="dataset")
|
snapshot_download_w_retry("winglian/tiny-shakespeare", repo_type="dataset")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
|
||||||
def download_evolkit_kd_sample_dataset():
|
|
||||||
# download the dataset
|
|
||||||
snapshot_download_w_retry(
|
|
||||||
"axolotl-ai-co/evolkit-logprobs-pipeline-75k-v2-sample", repo_type="dataset"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
def download_deepseek_model_fixture():
|
def download_deepseek_model_fixture():
|
||||||
snapshot_download_w_retry("axolotl-ai-co/DeepSeek-V3-11M", repo_type="model")
|
snapshot_download_w_retry("axolotl-ai-co/DeepSeek-V3-11M", repo_type="model")
|
||||||
@@ -216,16 +208,6 @@ def download_huggyllama_model_fixture():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
|
||||||
def download_llama33_70b_model_fixture():
|
|
||||||
# download the tokenizer only
|
|
||||||
snapshot_download_w_retry(
|
|
||||||
"axolotl-ai-co/Llama-3.3-70B-Instruct-tokenizer",
|
|
||||||
repo_type="model",
|
|
||||||
allow_patterns=["*token*", "config.json"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
def download_llama_1b_model_fixture():
|
def download_llama_1b_model_fixture():
|
||||||
# download the tokenizer only
|
# download the tokenizer only
|
||||||
@@ -333,14 +315,6 @@ def download_llama2_model_fixture():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
|
||||||
def download_llama32_1b_model_fixture():
|
|
||||||
snapshot_download_w_retry(
|
|
||||||
"osllmai-community/Llama-3.2-1B",
|
|
||||||
repo_type="model",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@enable_hf_offline
|
@enable_hf_offline
|
||||||
def tokenizer_huggyllama(
|
def tokenizer_huggyllama(
|
||||||
@@ -522,6 +496,12 @@ def dataset_fozziethebeat_alpaca_messages_2k_dpo_test_rev_ea82cff(
|
|||||||
return datasets.load_from_disk(ds_path)["train"]
|
return datasets.load_from_disk(ds_path)["train"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_tiny_llama_7m_model():
|
||||||
|
# download the model
|
||||||
|
return snapshot_download_w_retry("axolotl-ai-internal/llama-7m", repo_type="model")
|
||||||
|
|
||||||
|
|
||||||
# # pylint: disable=redefined-outer-name,unused-argument
|
# # pylint: disable=redefined-outer-name,unused-argument
|
||||||
# def test_load_fixtures(
|
# def test_load_fixtures(
|
||||||
# download_smollm2_135m_model,
|
# download_smollm2_135m_model,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from axolotl.cli.args import TrainerCliArgs
|
|||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils import get_pytorch_version
|
from axolotl.utils import get_pytorch_version
|
||||||
from axolotl.utils.config import normalize_config, prepare_plugins, validate_config
|
from axolotl.utils.config import normalize_config, prepare_plugins
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists
|
from ..utils import check_model_output_exists
|
||||||
@@ -56,7 +56,6 @@ class TestCutCrossEntropyIntegration:
|
|||||||
# pylint: disable=redefined-outer-name
|
# pylint: disable=redefined-outer-name
|
||||||
def test_llama_w_cce(self, min_cfg, temp_dir):
|
def test_llama_w_cce(self, min_cfg, temp_dir):
|
||||||
cfg = DictDefault(min_cfg)
|
cfg = DictDefault(min_cfg)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
@@ -102,7 +101,6 @@ class TestCutCrossEntropyIntegration:
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
@@ -131,7 +129,6 @@ class TestCutCrossEntropyIntegration:
|
|||||||
attention_type: True,
|
attention_type: True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ class TestKnowledgeDistillation:
|
|||||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||||
assert (Path(temp_dir) / "model.safetensors").exists()
|
assert (Path(temp_dir) / "model.safetensors").exists()
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/loss", 1.0, "Train Loss is too high"
|
temp_dir + "/runs", "train/loss", 1.0, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -121,5 +121,5 @@ class TestKnowledgeDistillation:
|
|||||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||||
assert (Path(temp_dir) / "adapter_model.safetensors").exists()
|
assert (Path(temp_dir) / "adapter_model.safetensors").exists()
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/loss", 1.0, "Train Loss is too high"
|
temp_dir + "/runs", "train/loss", 1.0, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ Simple end-to-end test for Liger integration
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, prepare_plugins, validate_config
|
from axolotl.utils.config import normalize_config, prepare_plugins
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from tests.e2e.utils import check_model_output_exists, require_torch_2_4_1
|
from tests.e2e.utils import check_model_output_exists, require_torch_2_4_1
|
||||||
@@ -54,7 +54,6 @@ class LigerIntegrationTestCase:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = validate_config(cfg)
|
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
@@ -101,7 +100,6 @@ class LigerIntegrationTestCase:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = validate_config(cfg)
|
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
|
|||||||
@@ -1,104 +0,0 @@
|
|||||||
"""
|
|
||||||
E2E smoke tests for LLMCompressorPlugin integration
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from axolotl.cli.args import TrainerCliArgs
|
|
||||||
from axolotl.common.datasets import load_datasets
|
|
||||||
from axolotl.train import train
|
|
||||||
from axolotl.utils.config import normalize_config, prepare_plugins, validate_config
|
|
||||||
from axolotl.utils.dict import DictDefault
|
|
||||||
|
|
||||||
from tests.e2e.utils import check_model_output_exists, require_torch_2_4_1
|
|
||||||
|
|
||||||
MODELS = [
|
|
||||||
"nm-testing/llama2.c-stories42M-pruned2.4-compressed",
|
|
||||||
"nm-testing/llama2.c-stories42M-gsm8k-sparse-only-compressed",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"base_model", MODELS, ids=["no-checkpoint-recipe", "with-checkpoint-recipe"]
|
|
||||||
)
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"save_compressed", [True, False], ids=["save_compressed", "save_uncompressed"]
|
|
||||||
)
|
|
||||||
class TestLLMCompressorIntegration:
|
|
||||||
"""
|
|
||||||
e2e tests for axolotl.integrations.llm_compressor.LLMCompressorPlugin
|
|
||||||
"""
|
|
||||||
|
|
||||||
@require_torch_2_4_1
|
|
||||||
def test_llmcompressor_plugin(
|
|
||||||
self, temp_dir, base_model: str, save_compressed: bool
|
|
||||||
):
|
|
||||||
# core cfg
|
|
||||||
cfg = DictDefault(
|
|
||||||
{
|
|
||||||
"base_model": base_model,
|
|
||||||
"plugins": ["axolotl.integrations.llm_compressor.LLMCompressorPlugin"],
|
|
||||||
"sequence_len": 1024,
|
|
||||||
"val_set_size": 0.05,
|
|
||||||
"special_tokens": {"pad_token": "<|endoftext|>"},
|
|
||||||
"datasets": [{"path": "mhenrichsen/alpaca_2k_test", "type": "alpaca"}],
|
|
||||||
"num_epochs": 1,
|
|
||||||
"micro_batch_size": 2,
|
|
||||||
"gradient_accumulation_steps": 2,
|
|
||||||
"output_dir": temp_dir,
|
|
||||||
"learning_rate": 1e-5,
|
|
||||||
"optimizer": "adamw_torch_fused",
|
|
||||||
"lr_scheduler": "cosine",
|
|
||||||
"save_safetensors": True,
|
|
||||||
"bf16": "auto",
|
|
||||||
"max_steps": 5,
|
|
||||||
"llmcompressor": {
|
|
||||||
"recipe": {
|
|
||||||
"finetuning_stage": {
|
|
||||||
"finetuning_modifiers": {
|
|
||||||
"ConstantPruningModifier": {
|
|
||||||
"targets": [
|
|
||||||
"re:.*q_proj.weight",
|
|
||||||
"re:.*k_proj.weight",
|
|
||||||
"re:.*v_proj.weight",
|
|
||||||
"re:.*o_proj.weight",
|
|
||||||
"re:.*gate_proj.weight",
|
|
||||||
"re:.*up_proj.weight",
|
|
||||||
"re:.*down_proj.weight",
|
|
||||||
],
|
|
||||||
"start": 0,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"save_compressed": save_compressed,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
prepare_plugins(cfg)
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
|
||||||
cli_args = TrainerCliArgs()
|
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
|
||||||
|
|
||||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
|
||||||
check_model_output_exists(temp_dir, cfg)
|
|
||||||
_check_llmcompressor_model_outputs(temp_dir, save_compressed)
|
|
||||||
|
|
||||||
|
|
||||||
def _check_llmcompressor_model_outputs(temp_dir, save_compressed):
|
|
||||||
|
|
||||||
# recipe.yaml should exist
|
|
||||||
assert (Path(temp_dir) / "recipe.yaml").exists()
|
|
||||||
|
|
||||||
# sparsity config exists if save_compressed
|
|
||||||
if save_compressed:
|
|
||||||
from compressed_tensors import ModelCompressor
|
|
||||||
from compressed_tensors.config import Sparse24BitMaskConfig
|
|
||||||
|
|
||||||
compressor = ModelCompressor.from_pretrained(temp_dir)
|
|
||||||
assert compressor is not None
|
|
||||||
assert isinstance(compressor.sparsity_config, Sparse24BitMaskConfig)
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
# Tests under this directory should get run "solo" on their own as they
|
|
||||||
# seem to cause issues when run in the same batch as other tests.
|
|
||||||
|
|||||||
@@ -49,9 +49,8 @@ class TestPackedFlex:
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "tatsu-lab/alpaca",
|
"path": "vicgalle/alpaca-gpt4",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
"split": "train[:10%]",
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
@@ -90,5 +89,5 @@ class TestPackedFlex:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -96,5 +96,5 @@ class TestMultiGPUGemma3:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 1.8, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 1.8, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"adapter": "lora",
|
"adapter": "lora",
|
||||||
"lora_r": 8,
|
"lora_r": 8,
|
||||||
@@ -94,7 +94,7 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -105,7 +105,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"eval_sample_packing": False,
|
"eval_sample_packing": False,
|
||||||
@@ -159,14 +159,14 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_dpo_lora_ddp(self, temp_dir):
|
def test_dpo_lora_ddp(self, temp_dir):
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"sample_packing": False,
|
"sample_packing": False,
|
||||||
"eval_sample_packing": False,
|
"eval_sample_packing": False,
|
||||||
@@ -244,7 +244,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"sample_packing": False,
|
"sample_packing": False,
|
||||||
"eval_sample_packing": False,
|
"eval_sample_packing": False,
|
||||||
@@ -326,7 +326,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"val_set_size": 0.01,
|
"val_set_size": 0.01,
|
||||||
"special_tokens": {
|
"special_tokens": {
|
||||||
@@ -385,7 +385,7 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -396,7 +396,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 1024,
|
"sequence_len": 1024,
|
||||||
@@ -457,7 +457,7 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@require_torch_2_6_0
|
@require_torch_2_6_0
|
||||||
@@ -475,7 +475,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
@@ -538,7 +538,7 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.1, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.1, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_fsdp_qlora_prequant_packed(self, temp_dir):
|
def test_fsdp_qlora_prequant_packed(self, temp_dir):
|
||||||
@@ -618,7 +618,7 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -654,7 +654,7 @@ class TestMultiGPULlama:
|
|||||||
adapter = {}
|
adapter = {}
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 1024,
|
"sequence_len": 1024,
|
||||||
@@ -702,7 +702,7 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -728,7 +728,7 @@ class TestMultiGPULlama:
|
|||||||
adapter = {}
|
adapter = {}
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 1024,
|
"sequence_len": 1024,
|
||||||
@@ -776,7 +776,7 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -802,7 +802,7 @@ class TestMultiGPULlama:
|
|||||||
adapter = {}
|
adapter = {}
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"sequence_len": 1024,
|
"sequence_len": 1024,
|
||||||
@@ -850,7 +850,7 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.skip(
|
@pytest.mark.skip(
|
||||||
@@ -860,7 +860,7 @@ class TestMultiGPULlama:
|
|||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
cfg = DictDefault(
|
cfg = DictDefault(
|
||||||
{
|
{
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
"base_model": "axolotl-ai-internal/llama-7m",
|
||||||
"fix_untrained_tokens": True,
|
"fix_untrained_tokens": True,
|
||||||
"sequence_len": 512,
|
"sequence_len": 512,
|
||||||
"val_set_size": 0.0,
|
"val_set_size": 0.0,
|
||||||
@@ -917,5 +917,5 @@ class TestMultiGPULlama:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 4.0, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 4.0, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ class TestMultiGPURay:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@require_torch_lt_2_6_0
|
@require_torch_lt_2_6_0
|
||||||
@@ -138,5 +138,5 @@ class TestMultiGPURay:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.3, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.3, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from transformers.testing_utils import get_torch_dist_unique_port
|
|||||||
|
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ...utils import check_tensorboard
|
from ..utils import check_tensorboard
|
||||||
|
|
||||||
os.environ["WANDB_DISABLED"] = "true"
|
os.environ["WANDB_DISABLED"] = "true"
|
||||||
|
|
||||||
@@ -93,7 +93,7 @@ class TestSequenceParallelism:
|
|||||||
)
|
)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.6, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.6, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -60,7 +60,6 @@ class Test4dMultipackLlama(unittest.TestCase):
|
|||||||
"fp16": True,
|
"fp16": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -105,7 +104,6 @@ class Test4dMultipackLlama(unittest.TestCase):
|
|||||||
"fp16": True,
|
"fp16": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -86,5 +86,5 @@ class TestFAXentropyLlama:
|
|||||||
check_model_output_exists(temp_dir, cfg)
|
check_model_output_exists(temp_dir, cfg)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 1.5, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 1.5, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -63,7 +63,6 @@ class TestFalconPatched(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -104,7 +103,6 @@ class TestFalconPatched(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from transformers.utils import is_torch_bf16_gpu_available
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -67,7 +67,6 @@ class TestFusedLlama(unittest.TestCase):
|
|||||||
cfg.bf16 = True
|
cfg.bf16 = True
|
||||||
else:
|
else:
|
||||||
cfg.fp16 = True
|
cfg.fp16 = True
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import pytest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -65,7 +65,6 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -106,7 +105,6 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_availab
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -70,7 +70,6 @@ class TestLoraLlama(unittest.TestCase):
|
|||||||
else:
|
else:
|
||||||
cfg.fp16 = True
|
cfg.fp16 = True
|
||||||
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -121,7 +120,6 @@ class TestLoraLlama(unittest.TestCase):
|
|||||||
"lr_scheduler": "cosine",
|
"lr_scheduler": "cosine",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -63,7 +63,6 @@ class TestMistral(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -105,7 +104,6 @@ class TestMistral(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -60,7 +60,6 @@ class TestMixtral(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import unittest
|
|||||||
|
|
||||||
import transformers
|
import transformers
|
||||||
|
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.models import load_model, load_tokenizer
|
from axolotl.utils.models import load_model, load_tokenizer
|
||||||
|
|
||||||
@@ -47,7 +47,6 @@ class TestModelPatches(unittest.TestCase):
|
|||||||
"eval_steps": 10,
|
"eval_steps": 10,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
tokenizer = load_tokenizer(cfg)
|
tokenizer = load_tokenizer(cfg)
|
||||||
load_model(cfg, tokenizer, inference=False)
|
load_model(cfg, tokenizer, inference=False)
|
||||||
@@ -80,7 +79,6 @@ class TestModelPatches(unittest.TestCase):
|
|||||||
"eval_steps": 10,
|
"eval_steps": 10,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
tokenizer = load_tokenizer(cfg)
|
tokenizer = load_tokenizer(cfg)
|
||||||
load_model(cfg, tokenizer, inference=False)
|
load_model(cfg, tokenizer, inference=False)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -63,7 +63,6 @@ class TestPhiMultipack(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -83,7 +82,7 @@ class TestPhiMultipack(unittest.TestCase):
|
|||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"flash_attention": True,
|
"flash_attention": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"load_in_4bit": True,
|
"load_in_8bit": False,
|
||||||
"adapter": "qlora",
|
"adapter": "qlora",
|
||||||
"lora_r": 64,
|
"lora_r": 64,
|
||||||
"lora_alpha": 32,
|
"lora_alpha": 32,
|
||||||
@@ -115,7 +114,6 @@ class TestPhiMultipack(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from transformers.utils import is_torch_bf16_gpu_available
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, most_recent_subdir
|
from ..utils import check_model_output_exists, most_recent_subdir
|
||||||
@@ -46,9 +46,8 @@ class TestResumeLlama:
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "tatsu-lab/alpaca",
|
"path": "vicgalle/alpaca-gpt4",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
"split": "train[:10%]",
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 2,
|
"num_epochs": 2,
|
||||||
@@ -68,7 +67,6 @@ class TestResumeLlama:
|
|||||||
cfg.bf16 = True
|
cfg.bf16 = True
|
||||||
else:
|
else:
|
||||||
cfg.fp16 = True
|
cfg.fp16 = True
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import pytest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, check_tensorboard
|
from ..utils import check_model_output_exists, check_tensorboard
|
||||||
@@ -72,7 +72,6 @@ class TestUnslothQLoRA:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -81,7 +80,7 @@ class TestUnslothQLoRA:
|
|||||||
check_model_output_exists(temp_dir, cfg)
|
check_model_output_exists(temp_dir, cfg)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_unsloth_llama_qlora_unpacked(self, temp_dir):
|
def test_unsloth_llama_qlora_unpacked(self, temp_dir):
|
||||||
@@ -123,7 +122,6 @@ class TestUnslothQLoRA:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -132,7 +130,7 @@ class TestUnslothQLoRA:
|
|||||||
check_model_output_exists(temp_dir, cfg)
|
check_model_output_exists(temp_dir, cfg)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -179,7 +177,6 @@ class TestUnslothQLoRA:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -188,5 +185,5 @@ class TestUnslothQLoRA:
|
|||||||
check_model_output_exists(temp_dir, cfg)
|
check_model_output_exists(temp_dir, cfg)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -41,9 +41,8 @@ class TestPackedFlex(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "tatsu-lab/alpaca",
|
"path": "vicgalle/alpaca-gpt4",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
"split": "train[:10%]",
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
@@ -70,5 +69,5 @@ class TestPackedFlex(unittest.TestCase):
|
|||||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -102,7 +102,6 @@ class TestEmbeddingsLrScale(unittest.TestCase):
|
|||||||
"use_tensorboard": True,
|
"use_tensorboard": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -84,5 +84,5 @@ class TestPretrainLlama:
|
|||||||
temp_dir + "/runs",
|
temp_dir + "/runs",
|
||||||
"train/train_loss",
|
"train/train_loss",
|
||||||
loss_threshold,
|
loss_threshold,
|
||||||
"Train Loss is too high",
|
"Train Loss (%s) is too high",
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -109,7 +109,6 @@ class TestLlamaVision(unittest.TestCase):
|
|||||||
"bf16": True,
|
"bf16": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -40,9 +40,8 @@ class TestPackedLlama(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "tatsu-lab/alpaca",
|
"path": "vicgalle/alpaca-gpt4",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
"split": "train[:10%]",
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
@@ -69,5 +68,5 @@ class TestPackedLlama(unittest.TestCase):
|
|||||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||||
|
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.0, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.0, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ class TestPhi(unittest.TestCase):
|
|||||||
"tokenizer_type": "AutoTokenizer",
|
"tokenizer_type": "AutoTokenizer",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"sample_packing": False,
|
"sample_packing": False,
|
||||||
"load_in_4bit": True,
|
"load_in_8bit": False,
|
||||||
"adapter": "qlora",
|
"adapter": "qlora",
|
||||||
"lora_r": 64,
|
"lora_r": 64,
|
||||||
"lora_alpha": 32,
|
"lora_alpha": 32,
|
||||||
@@ -111,7 +111,6 @@ class TestPhi(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from .utils import check_model_output_exists, check_tensorboard, with_temp_dir
|
from .utils import check_model_output_exists, check_tensorboard, with_temp_dir
|
||||||
@@ -57,7 +57,6 @@ class TestProcessRewardSmolLM2(unittest.TestCase):
|
|||||||
"seed": 42,
|
"seed": 42,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -73,6 +73,6 @@ class TestRewardModelLoraSmolLM2(unittest.TestCase):
|
|||||||
|
|
||||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||||
check_tensorboard(
|
check_tensorboard(
|
||||||
temp_dir + "/runs", "train/train_loss", 2.5, "Train Loss is too high"
|
temp_dir + "/runs", "train/train_loss", 2.5, "Train loss (%s) is too high"
|
||||||
)
|
)
|
||||||
check_model_output_exists(temp_dir, cfg)
|
check_model_output_exists(temp_dir, cfg)
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from unittest.mock import patch
|
|||||||
import pytest
|
import pytest
|
||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
|
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
from axolotl.utils.config import normalize_config
|
||||||
from axolotl.utils.data import prepare_dataset
|
from axolotl.utils.data import prepare_dataset
|
||||||
from axolotl.utils.data.rl import load_prepare_preference_datasets
|
from axolotl.utils.data.rl import load_prepare_preference_datasets
|
||||||
from axolotl.utils.data.utils import deduplicate_and_log_datasets
|
from axolotl.utils.data.utils import deduplicate_and_log_datasets
|
||||||
@@ -319,7 +319,6 @@ class TestDeduplicateNonRL(unittest.TestCase):
|
|||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
self.cfg_1 = validate_config(self.cfg_1)
|
|
||||||
normalize_config(self.cfg_1)
|
normalize_config(self.cfg_1)
|
||||||
|
|
||||||
@pytest.mark.skip(reason="TODO: fix hf hub offline to work with HF rate limits")
|
@pytest.mark.skip(reason="TODO: fix hf hub offline to work with HF rate limits")
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from transformers.models.auto.tokenization_auto import AutoTokenizer
|
|||||||
|
|
||||||
from axolotl.utils.callbacks.perplexity import Perplexity
|
from axolotl.utils.callbacks.perplexity import Perplexity
|
||||||
|
|
||||||
MODEL_NAME = "HuggingFaceTB/SmolLM2-135M"
|
MODEL_NAME = "axolotl-ai-internal/llama-7m"
|
||||||
|
|
||||||
|
|
||||||
@fixture()
|
@fixture()
|
||||||
@@ -36,7 +36,7 @@ One day, a little fish named Fin was swimming near the shore. He saw a big crab
|
|||||||
"""
|
"""
|
||||||
result = metric.compute(model, [sample_text])
|
result = metric.compute(model, [sample_text])
|
||||||
ppl = result["score"]
|
ppl = result["score"]
|
||||||
assert round(ppl, 2) == 7.41
|
assert round(ppl, 2) == 75.14
|
||||||
|
|
||||||
|
|
||||||
def test_perplexity_short(model, metric):
|
def test_perplexity_short(model, metric):
|
||||||
@@ -44,4 +44,4 @@ def test_perplexity_short(model, metric):
|
|||||||
sample_text = "Once upon a time, there was a little car named Beep. Beep loved to go fast and play in the sun."
|
sample_text = "Once upon a time, there was a little car named Beep. Beep loved to go fast and play in the sun."
|
||||||
result = metric.compute(model, [sample_text])
|
result = metric.compute(model, [sample_text])
|
||||||
ppl = result["score"]
|
ppl = result["score"]
|
||||||
assert round(ppl, 2) == 10.33
|
assert round(ppl, 2) == 70.54
|
||||||
|
|||||||
Reference in New Issue
Block a user