Compare commits
36 Commits
preprocess
...
llmcompres
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b708a1cc45 | ||
|
|
daa9a58f83 | ||
|
|
ae7069e15b | ||
|
|
20d48cd617 | ||
|
|
1447beb132 | ||
|
|
e766a730ba | ||
|
|
7dc797860e | ||
|
|
ff4904c8c4 | ||
|
|
45b7293793 | ||
|
|
279c7178bc | ||
|
|
e73c3709f9 | ||
|
|
33562189f8 | ||
|
|
c057a2268f | ||
|
|
9d7a3809b5 | ||
|
|
b7b24d6a64 | ||
|
|
8b82b8f7a1 | ||
|
|
81da58c0a1 | ||
|
|
2cd5a234a7 | ||
|
|
8c1af0747d | ||
|
|
a06b360d99 | ||
|
|
0f6456a14f | ||
|
|
47a333ce49 | ||
|
|
f9d6776c28 | ||
|
|
66f41ec6f1 | ||
|
|
85053f4bd4 | ||
|
|
a4d5112ae1 | ||
|
|
0d691cc2a7 | ||
|
|
c4053481ff | ||
|
|
a6d28d19b1 | ||
|
|
32e335dd51 | ||
|
|
7651550850 | ||
|
|
341e95aac9 | ||
|
|
b882dfb63f | ||
|
|
b640db1dbc | ||
|
|
4ce469d32e | ||
|
|
60a8f0958d |
12
.github/workflows/base.yml
vendored
12
.github/workflows/base.yml
vendored
@@ -46,6 +46,18 @@ jobs:
|
|||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
|
- cuda: "126"
|
||||||
|
cuda_version: 12.6.3
|
||||||
|
cudnn_version: ""
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.7.0
|
||||||
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
|
- cuda: "128"
|
||||||
|
cuda_version: 12.6.3
|
||||||
|
cudnn_version: ""
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.7.0
|
||||||
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
- cuda: "128"
|
- cuda: "128"
|
||||||
cuda_version: 12.8.1
|
cuda_version: 12.8.1
|
||||||
cudnn_version: ""
|
cudnn_version: ""
|
||||||
|
|||||||
12
.github/workflows/main.yml
vendored
12
.github/workflows/main.yml
vendored
@@ -31,6 +31,11 @@ jobs:
|
|||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
axolotl_extras: vllm
|
axolotl_extras: vllm
|
||||||
is_latest: true
|
is_latest: true
|
||||||
|
- cuda: 126
|
||||||
|
cuda_version: 12.6.3
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.7.0
|
||||||
|
axolotl_extras: vllm
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -93,6 +98,11 @@ jobs:
|
|||||||
pytorch: 2.6.0
|
pytorch: 2.6.0
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
is_latest: true
|
is_latest: true
|
||||||
|
- cuda: 126
|
||||||
|
cuda_version: 12.6.3
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.7.0
|
||||||
|
axolotl_extras:
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
@@ -138,7 +148,7 @@ jobs:
|
|||||||
- cuda: 124
|
- cuda: 124
|
||||||
cuda_version: 12.4.1
|
cuda_version: 12.4.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.4.1
|
pytorch: 2.6.0
|
||||||
axolotl_extras:
|
axolotl_extras:
|
||||||
runs-on: axolotl-gpu-runner
|
runs-on: axolotl-gpu-runner
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
8
.github/workflows/multi-gpu-e2e.yml
vendored
8
.github/workflows/multi-gpu-e2e.yml
vendored
@@ -45,6 +45,13 @@ jobs:
|
|||||||
axolotl_extras: vllm
|
axolotl_extras: vllm
|
||||||
num_gpus: 2
|
num_gpus: 2
|
||||||
nightly_build: "true"
|
nightly_build: "true"
|
||||||
|
- cuda: 126
|
||||||
|
cuda_version: 12.6.3
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.7.0
|
||||||
|
axolotl_extras:
|
||||||
|
num_gpus: 2
|
||||||
|
nightly_build: "true"
|
||||||
runs-on: [self-hosted, modal]
|
runs-on: [self-hosted, modal]
|
||||||
timeout-minutes: 120
|
timeout-minutes: 120
|
||||||
steps:
|
steps:
|
||||||
@@ -67,6 +74,7 @@ jobs:
|
|||||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||||
echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
|
echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
|
||||||
|
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.multigpu
|
modal run cicd.multigpu
|
||||||
|
|||||||
1
.github/workflows/tests-nightly.yml
vendored
1
.github/workflows/tests-nightly.yml
vendored
@@ -147,6 +147,7 @@ jobs:
|
|||||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||||
echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
|
echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
|
||||||
|
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.e2e_tests
|
modal run cicd.e2e_tests
|
||||||
|
|||||||
11
.github/workflows/tests.yml
vendored
11
.github/workflows/tests.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
|||||||
max-parallel: 2
|
max-parallel: 2
|
||||||
matrix:
|
matrix:
|
||||||
python_version: ["3.11"]
|
python_version: ["3.11"]
|
||||||
pytorch_version: ["2.4.1", "2.5.1", "2.6.0"]
|
pytorch_version: ["2.4.1", "2.5.1", "2.6.0", "2.7.0"]
|
||||||
timeout-minutes: 20
|
timeout-minutes: 20
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -109,6 +109,7 @@ jobs:
|
|||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v5
|
uses: codecov/codecov-action@v5
|
||||||
with:
|
with:
|
||||||
|
token: ${{ secrets.CODECOV_TOKEN }}
|
||||||
files: ./coverage.xml
|
files: ./coverage.xml
|
||||||
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
flags: unittests,pytorch-${{ matrix.pytorch_version }}
|
||||||
fail_ci_if_error: false
|
fail_ci_if_error: false
|
||||||
@@ -241,6 +242,7 @@ jobs:
|
|||||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||||
echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV
|
echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV
|
||||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||||
|
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.e2e_tests
|
modal run cicd.e2e_tests
|
||||||
@@ -268,6 +270,12 @@ jobs:
|
|||||||
pytorch: 2.5.1
|
pytorch: 2.5.1
|
||||||
num_gpus: 1
|
num_gpus: 1
|
||||||
axolotl_extras: vllm
|
axolotl_extras: vllm
|
||||||
|
- cuda: 126
|
||||||
|
cuda_version: 12.6.3
|
||||||
|
python_version: "3.11"
|
||||||
|
pytorch: 2.7.0
|
||||||
|
num_gpus: 1
|
||||||
|
axolotl_extras:
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -288,6 +296,7 @@ jobs:
|
|||||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||||
echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV
|
echo "MODAL_IMAGE_BUILDER_VERSION=2024.10" >> $GITHUB_ENV
|
||||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||||
|
echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
|
||||||
- name: Run tests job on Modal
|
- name: Run tests job on Modal
|
||||||
run: |
|
run: |
|
||||||
modal run cicd.e2e_tests
|
modal run cicd.e2e_tests
|
||||||
|
|||||||
12
cicd/cicd.sh
12
cicd/cicd.sh
@@ -9,8 +9,7 @@ pytest -v --durations=10 -n8 \
|
|||||||
--ignore=tests/patched/ \
|
--ignore=tests/patched/ \
|
||||||
--ignore=tests/cli \
|
--ignore=tests/cli \
|
||||||
/workspace/axolotl/tests/ \
|
/workspace/axolotl/tests/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl
|
||||||
--cov-report=xml:coverage.xml
|
|
||||||
|
|
||||||
# Run lora kernels tests with coverage append
|
# Run lora kernels tests with coverage append
|
||||||
pytest -v --durations=10 \
|
pytest -v --durations=10 \
|
||||||
@@ -51,11 +50,6 @@ pytest -v --durations=10 \
|
|||||||
/workspace/axolotl/tests/e2e/ \
|
/workspace/axolotl/tests/e2e/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append \
|
--cov-append \
|
||||||
--cov-report=xml:coverage.xml
|
--cov-report=xml:e2e-coverage.xml
|
||||||
|
|
||||||
# Upload coverage to Codecov
|
codecov upload-process -t $CODECOV_TOKEN -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION}
|
||||||
if [ -f e2e-coverage.xml ]; then
|
|
||||||
codecov -f e2e-coverage.xml -F e2e,pytorch-${PYTORCH_VERSION}
|
|
||||||
else
|
|
||||||
echo "Coverage file not found. Coverage report may have failed."
|
|
||||||
fi
|
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ df_args = {
|
|||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
"NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
|
"NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
|
||||||
|
"CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""),
|
||||||
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ df_args = {
|
|||||||
"CUDA": os.environ.get("CUDA", "121"),
|
"CUDA": os.environ.get("CUDA", "121"),
|
||||||
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
"GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
|
||||||
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
"GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
|
||||||
|
"CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""),
|
||||||
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
"HF_HOME": "/workspace/data/huggingface-cache/hub",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,25 +1,23 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
# only run one test at a time so as not to OOM the GPU
|
|
||||||
pytest -v --durations=10 -n2 /workspace/axolotl/tests/e2e/multigpu/ --ignore=/workspace/axolotl/tests/e2e/multigpu/solo/
|
|
||||||
pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/multigpu/solo/
|
|
||||||
|
|
||||||
# Only run two tests at a time to avoid OOM on GPU (with coverage collection)
|
# Only run two tests at a time to avoid OOM on GPU (with coverage collection)
|
||||||
pytest -v -n2 \
|
pytest -v -n2 \
|
||||||
--ignore=/workspace/axolotl/tests/e2e/multigpu/solo/
|
--ignore=/workspace/axolotl/tests/e2e/multigpu/solo/ \
|
||||||
|
--ignore=/workspace/axolotl/tests/e2e/multigpu/patched/ \
|
||||||
/workspace/axolotl/tests/e2e/multigpu/ \
|
/workspace/axolotl/tests/e2e/multigpu/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl
|
||||||
--cov-report=xml:multigpu-coverage.xml
|
|
||||||
|
|
||||||
pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/multigpu/solo/ \
|
# Run solo tests with coverage append
|
||||||
|
pytest -v --durations=10 -n1 \
|
||||||
|
/workspace/axolotl/tests/e2e/multigpu/solo/ \
|
||||||
|
--cov=axolotl \
|
||||||
|
--cov-append
|
||||||
|
|
||||||
|
pytest -v --durations=10 -n1 /workspace/axolotl/tests/e2e/multigpu/patched/ \
|
||||||
--cov=axolotl \
|
--cov=axolotl \
|
||||||
--cov-append \
|
--cov-append \
|
||||||
--cov-report=xml:multigpu-coverage.xml
|
--cov-report=xml:multigpu-coverage.xml
|
||||||
|
|
||||||
# Upload coverage to Codecov
|
# Upload coverage to Codecov
|
||||||
if [ -f multigpu-coverage.xml ]; then
|
codecov upload-process -t $CODECOV_TOKEN -f multigpu-coverage.xml -F multigpu,docker-tests,pytorch-${PYTORCH_VERSION}
|
||||||
codecov -f multigpu-coverage.xml -F multigpu,docker-tests,pytorch-${PYTORCH_VERSION}
|
|
||||||
else
|
|
||||||
echo "Coverage file not found. Coverage report may have failed."
|
|
||||||
fi
|
|
||||||
|
|||||||
@@ -49,3 +49,6 @@ comment:
|
|||||||
require_changes: no
|
require_changes: no
|
||||||
require_base: no
|
require_base: no
|
||||||
require_head: yes
|
require_head: yes
|
||||||
|
|
||||||
|
github_checks:
|
||||||
|
annotations: false
|
||||||
|
|||||||
@@ -37,3 +37,7 @@ RUN git lfs install --skip-repo && \
|
|||||||
pip3 install awscli && \
|
pip3 install awscli && \
|
||||||
# The base image ships with `pydantic==1.8.2` which is not working
|
# The base image ships with `pydantic==1.8.2` which is not working
|
||||||
pip3 install -U --no-cache-dir pydantic==1.10.10
|
pip3 install -U --no-cache-dir pydantic==1.10.10
|
||||||
|
|
||||||
|
RUN if [ "$PYTORCH_VERSION" = "2.7.0" ] ; then \
|
||||||
|
pip3 install flash-attn==2.7.4.post1; \
|
||||||
|
fi
|
||||||
|
|||||||
11
docs/cli.qmd
11
docs/cli.qmd
@@ -199,6 +199,17 @@ output_dir: # Directory to save evaluation results
|
|||||||
|
|
||||||
See [LM Eval Harness](https://github.com/EleutherAI/lm-evaluation-harness) for more details.
|
See [LM Eval Harness](https://github.com/EleutherAI/lm-evaluation-harness) for more details.
|
||||||
|
|
||||||
|
### delinearize-llama4
|
||||||
|
|
||||||
|
Delinearizes a Llama 4 linearized model into a regular HuggingFace Llama 4 model. This only works with the non-quantized linearized model.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
axolotl delinearize-llama4 --model path/to/model_dir --output path/to/output_dir
|
||||||
|
```
|
||||||
|
|
||||||
|
This would be necessary to use with other frameworks. If you have an adapter, merge it with the non-quantized linearized model before delinearizing.
|
||||||
|
|
||||||
|
|
||||||
## Legacy CLI Usage
|
## Legacy CLI Usage
|
||||||
|
|
||||||
While the new Click-based CLI is preferred, Axolotl still supports the legacy module-based CLI:
|
While the new Click-based CLI is preferred, Axolotl still supports the legacy module-based CLI:
|
||||||
|
|||||||
@@ -49,7 +49,8 @@ sections = [
|
|||||||
("Knowledge Distillation (KD)", "kd"),
|
("Knowledge Distillation (KD)", "kd"),
|
||||||
("Liger Kernels", "liger"),
|
("Liger Kernels", "liger"),
|
||||||
("Language Model Evaluation Harness (LM Eval)", "lm_eval"),
|
("Language Model Evaluation Harness (LM Eval)", "lm_eval"),
|
||||||
("Spectrum", "spectrum")
|
("Spectrum", "spectrum"),
|
||||||
|
("LLMCompressor", "llm_compressor")
|
||||||
]
|
]
|
||||||
|
|
||||||
for section_name, folder_name in sections:
|
for section_name, folder_name in sections:
|
||||||
|
|||||||
@@ -19,6 +19,12 @@ This guide covers all the ways you can install and set up Axolotl for your envir
|
|||||||
|
|
||||||
## Installation Methods {#sec-installation-methods}
|
## Installation Methods {#sec-installation-methods}
|
||||||
|
|
||||||
|
::: {.callout-important}
|
||||||
|
Please make sure to have Pytorch installed before installing Axolotl in your local environment.
|
||||||
|
|
||||||
|
Follow the instructions at: [https://pytorch.org/get-started/locally/](https://pytorch.org/get-started/locally/)
|
||||||
|
:::
|
||||||
|
|
||||||
### PyPI Installation (Recommended) {#sec-pypi}
|
### PyPI Installation (Recommended) {#sec-pypi}
|
||||||
|
|
||||||
```{.bash}
|
```{.bash}
|
||||||
|
|||||||
62
examples/glm4/qlora-32b.yaml
Normal file
62
examples/glm4/qlora-32b.yaml
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
base_model: THUDM/GLM-4-32B-0414
|
||||||
|
# Automatically upload checkpoint and final model to HF
|
||||||
|
# hub_model_id: username/custom_model_name
|
||||||
|
|
||||||
|
load_in_4bit: true
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- path: teknium/GPT4-LLM-Cleaned
|
||||||
|
type: alpaca
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0
|
||||||
|
output_dir: ./outputs/qlora-out
|
||||||
|
|
||||||
|
adapter: qlora
|
||||||
|
lora_model_dir:
|
||||||
|
|
||||||
|
sequence_len: 2048
|
||||||
|
sample_packing: true
|
||||||
|
eval_sample_packing: true
|
||||||
|
pad_to_sequence_len: true
|
||||||
|
|
||||||
|
lora_r: 16
|
||||||
|
lora_alpha: 32
|
||||||
|
lora_dropout: 0.05
|
||||||
|
lora_target_modules:
|
||||||
|
- gate_proj
|
||||||
|
- down_proj
|
||||||
|
- up_proj
|
||||||
|
- q_proj
|
||||||
|
- v_proj
|
||||||
|
- k_proj
|
||||||
|
- o_proj
|
||||||
|
|
||||||
|
wandb_project:
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 2
|
||||||
|
micro_batch_size: 2
|
||||||
|
num_epochs: 1
|
||||||
|
optimizer: adamw_8bit
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 0.0002
|
||||||
|
|
||||||
|
bf16: auto
|
||||||
|
tf32: false
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
resume_from_checkpoint:
|
||||||
|
logging_steps: 1
|
||||||
|
flash_attention: true
|
||||||
|
|
||||||
|
loss_watchdog_threshold: 5.0
|
||||||
|
loss_watchdog_patience: 3
|
||||||
|
|
||||||
|
warmup_steps: 10
|
||||||
|
evals_per_epoch: 1
|
||||||
|
saves_per_epoch: 1
|
||||||
|
weight_decay: 0.0
|
||||||
|
special_tokens:
|
||||||
77
examples/llama-3/sparse-finetuning.yaml
Normal file
77
examples/llama-3/sparse-finetuning.yaml
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
base_model: neuralmagic/Sparse-Llama-3.1-8B-2of4
|
||||||
|
|
||||||
|
plugins:
|
||||||
|
- axolotl.integrations.llm_compressor.LLMCompressorPlugin
|
||||||
|
|
||||||
|
load_in_8bit: false
|
||||||
|
load_in_4bit: false
|
||||||
|
strict: false
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- path: tatsu-lab/alpaca
|
||||||
|
type: alpaca
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0.05
|
||||||
|
output_dir: ./outputs/out
|
||||||
|
|
||||||
|
sequence_len: 4096
|
||||||
|
sample_packing: true
|
||||||
|
pad_to_sequence_len: true
|
||||||
|
eval_sample_packing: false
|
||||||
|
|
||||||
|
wandb_project:
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_name:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 8
|
||||||
|
micro_batch_size: 1
|
||||||
|
num_epochs: 1
|
||||||
|
optimizer: paged_adamw_8bit
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 2e-5
|
||||||
|
|
||||||
|
train_on_inputs: false
|
||||||
|
group_by_length: false
|
||||||
|
bf16: auto
|
||||||
|
fp16:
|
||||||
|
tf32: false
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
gradient_checkpointing_kwargs:
|
||||||
|
use_reentrant: false
|
||||||
|
early_stopping_patience:
|
||||||
|
resume_from_checkpoint:
|
||||||
|
logging_steps: 1
|
||||||
|
xformers_attention:
|
||||||
|
flash_attention: true
|
||||||
|
|
||||||
|
warmup_steps: 100
|
||||||
|
evals_per_epoch: 2
|
||||||
|
eval_table_size:
|
||||||
|
saves_per_epoch: 1
|
||||||
|
debug:
|
||||||
|
deepspeed:
|
||||||
|
weight_decay: 0.0
|
||||||
|
fsdp:
|
||||||
|
fsdp_config:
|
||||||
|
special_tokens:
|
||||||
|
pad_token: <|end_of_text|>
|
||||||
|
|
||||||
|
llmcompressor:
|
||||||
|
recipe:
|
||||||
|
finetuning_stage:
|
||||||
|
finetuning_modifiers:
|
||||||
|
ConstantPruningModifier:
|
||||||
|
targets: [
|
||||||
|
're:.*q_proj.weight',
|
||||||
|
're:.*k_proj.weight',
|
||||||
|
're:.*v_proj.weight',
|
||||||
|
're:.*o_proj.weight',
|
||||||
|
're:.*gate_proj.weight',
|
||||||
|
're:.*up_proj.weight',
|
||||||
|
're:.*down_proj.weight',
|
||||||
|
]
|
||||||
|
start: 0
|
||||||
|
save_compressed: true
|
||||||
@@ -26,3 +26,11 @@ Multi-GPU (4xH100) for Llama 4 Scout uses 62.8GB VRAM/GPU @ 4k contenxt length @
|
|||||||
### Llama 4 Maverick 17Bx128Experts (400B)
|
### Llama 4 Maverick 17Bx128Experts (400B)
|
||||||
|
|
||||||
Coming Soon
|
Coming Soon
|
||||||
|
|
||||||
|
## Delinearized Llama 4 Models
|
||||||
|
|
||||||
|
We provide a script to delinearize Llama 4 linearized models into regular HuggingFace Llama 4 models.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
axolotl delinearize-llama4 --model path/to/model_dir --output path/to/output_dir
|
||||||
|
```
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
codecov
|
codecov
|
||||||
|
codecov-cli
|
||||||
pytest
|
pytest
|
||||||
pytest-cov
|
pytest-cov
|
||||||
pytest-retry
|
pytest-retry
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ triton>=3.0.0
|
|||||||
mamba-ssm==1.2.0.post1
|
mamba-ssm==1.2.0.post1
|
||||||
xformers>=0.0.23.post1
|
xformers>=0.0.23.post1
|
||||||
autoawq==0.2.7.post3
|
autoawq==0.2.7.post3
|
||||||
liger-kernel==0.5.6
|
liger-kernel==0.5.8
|
||||||
# END section
|
# END section
|
||||||
|
|
||||||
packaging==23.2
|
packaging==23.2
|
||||||
@@ -19,6 +19,7 @@ datasets==3.5.0
|
|||||||
deepspeed>=0.15.4
|
deepspeed>=0.15.4
|
||||||
trl==0.16.1
|
trl==0.16.1
|
||||||
hf_xet==1.0.0
|
hf_xet==1.0.0
|
||||||
|
hqq==0.2.5
|
||||||
|
|
||||||
optimum==1.16.2
|
optimum==1.16.2
|
||||||
hf_transfer
|
hf_transfer
|
||||||
|
|||||||
15
setup.py
15
setup.py
@@ -51,7 +51,7 @@ def parse_requirements(extras_require_map):
|
|||||||
try:
|
try:
|
||||||
torch_version = version("torch")
|
torch_version = version("torch")
|
||||||
except PackageNotFoundError:
|
except PackageNotFoundError:
|
||||||
torch_version = "2.5.1"
|
torch_version = "2.6.0" # default to torch 2.6
|
||||||
_install_requires.append(f"torch=={torch_version}")
|
_install_requires.append(f"torch=={torch_version}")
|
||||||
|
|
||||||
version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version)
|
version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version)
|
||||||
@@ -64,9 +64,15 @@ def parse_requirements(extras_require_map):
|
|||||||
else:
|
else:
|
||||||
raise ValueError("Invalid version format")
|
raise ValueError("Invalid version format")
|
||||||
|
|
||||||
if (major, minor) >= (2, 6):
|
if (major, minor) >= (2, 7):
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
_install_requires.append("xformers==0.0.29.post2")
|
# _install_requires.append("xformers==0.0.29.post3") # xformers seems to be hard pinned to 2.6.0
|
||||||
|
extras_require_map["vllm"] = ["vllm==0.8.3"]
|
||||||
|
elif (major, minor) >= (2, 6):
|
||||||
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
|
_install_requires.append(
|
||||||
|
"xformers==0.0.29.post2"
|
||||||
|
) # vllm needs post2 w torch 2.6
|
||||||
extras_require_map["vllm"] = ["vllm==0.8.3"]
|
extras_require_map["vllm"] = ["vllm==0.8.3"]
|
||||||
elif (major, minor) >= (2, 5):
|
elif (major, minor) >= (2, 5):
|
||||||
_install_requires.pop(_install_requires.index(xformers_version))
|
_install_requires.pop(_install_requires.index(xformers_version))
|
||||||
@@ -143,6 +149,9 @@ extras_require = {
|
|||||||
"vllm": [
|
"vllm": [
|
||||||
"vllm==0.7.2",
|
"vllm==0.7.2",
|
||||||
],
|
],
|
||||||
|
"llmcompressor": [
|
||||||
|
"llmcompressor==0.5.1",
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
install_requires, dependency_links, extras_require_build = parse_requirements(
|
install_requires, dependency_links, extras_require_build = parse_requirements(
|
||||||
|
|||||||
@@ -39,16 +39,16 @@ class TrainerCliArgs:
|
|||||||
class VllmServeCliArgs:
|
class VllmServeCliArgs:
|
||||||
"""Dataclass with CLI arguments for `axolotl vllm-serve` command."""
|
"""Dataclass with CLI arguments for `axolotl vllm-serve` command."""
|
||||||
|
|
||||||
tensor_parallel_size: int = field(
|
tensor_parallel_size: Optional[int] = field(
|
||||||
default=1,
|
default=None,
|
||||||
metadata={"help": "Number of tensor parallel workers to use."},
|
metadata={"help": "Number of tensor parallel workers to use."},
|
||||||
)
|
)
|
||||||
host: str = field(
|
host: Optional[str] = field(
|
||||||
default="0.0.0.0", # nosec B104
|
default=None, # nosec B104
|
||||||
metadata={"help": "Host address to run the server on."},
|
metadata={"help": "Host address to run the server on."},
|
||||||
)
|
)
|
||||||
port: int = field(
|
port: Optional[int] = field(
|
||||||
default=8000,
|
default=None,
|
||||||
metadata={"help": "Port to run the server on."},
|
metadata={"help": "Port to run the server on."},
|
||||||
)
|
)
|
||||||
gpu_memory_utilization: Optional[float] = field(
|
gpu_memory_utilization: Optional[float] = field(
|
||||||
|
|||||||
@@ -129,19 +129,17 @@ def load_preference_datasets(
|
|||||||
total_num_steps = None
|
total_num_steps = None
|
||||||
|
|
||||||
if cli_args.debug or cfg.debug:
|
if cli_args.debug or cfg.debug:
|
||||||
if not cfg.rl == "grpo":
|
LOG.info("check_dataset_labels...")
|
||||||
LOG.info("check_dataset_labels...")
|
|
||||||
|
|
||||||
tokenizer = load_tokenizer(cfg)
|
tokenizer = load_tokenizer(cfg)
|
||||||
train_samples = sample_dataset(train_dataset, cli_args.debug_num_examples)
|
train_samples = sample_dataset(train_dataset, cli_args.debug_num_examples)
|
||||||
|
check_dataset_labels(
|
||||||
check_dataset_labels(
|
train_samples,
|
||||||
train_samples,
|
tokenizer,
|
||||||
tokenizer,
|
num_examples=cli_args.debug_num_examples,
|
||||||
num_examples=cli_args.debug_num_examples,
|
text_only=cli_args.debug_text_only,
|
||||||
text_only=cli_args.debug_text_only,
|
rl_mode=True,
|
||||||
rl_mode=True,
|
)
|
||||||
)
|
|
||||||
|
|
||||||
return TrainDatasetMeta(
|
return TrainDatasetMeta(
|
||||||
train_dataset=train_dataset,
|
train_dataset=train_dataset,
|
||||||
|
|||||||
@@ -1040,9 +1040,11 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
|
|||||||
if self.cfg.dataset_processes:
|
if self.cfg.dataset_processes:
|
||||||
training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
|
training_args_kwargs["dataset_num_proc"] = self.cfg.dataset_processes
|
||||||
|
|
||||||
if (self.cfg.trl and self.cfg.trl.beta) or self.cfg.rl_beta:
|
if self.cfg.trl and self.cfg.trl.beta is not None:
|
||||||
training_args_kwargs["beta"] = self.cfg.trl.beta or self.cfg.rl_beta
|
training_args_kwargs["beta"] = self.cfg.trl.beta
|
||||||
if self.cfg.orpo_alpha:
|
elif self.cfg.rl_beta is not None:
|
||||||
|
training_args_kwargs["beta"] = self.cfg.rl_beta
|
||||||
|
elif self.cfg.orpo_alpha is not None:
|
||||||
# trl does some odd mapping of alpha to beta to reuse the beta parameter ???
|
# trl does some odd mapping of alpha to beta to reuse the beta parameter ???
|
||||||
training_args_kwargs["beta"] = self.cfg.orpo_alpha
|
training_args_kwargs["beta"] = self.cfg.orpo_alpha
|
||||||
|
|
||||||
|
|||||||
@@ -40,8 +40,8 @@ class GRPOStrategy:
|
|||||||
|
|
||||||
if trl.use_vllm:
|
if trl.use_vllm:
|
||||||
grpo_args_kwargs["use_vllm"] = trl.use_vllm
|
grpo_args_kwargs["use_vllm"] = trl.use_vllm
|
||||||
grpo_args_kwargs["vllm_server_host"] = trl.vllm_server_host
|
grpo_args_kwargs["vllm_server_host"] = trl.vllm_server_host or trl.vllm.host
|
||||||
grpo_args_kwargs["vllm_server_port"] = trl.vllm_server_port
|
grpo_args_kwargs["vllm_server_port"] = trl.vllm_server_port or trl.vllm.port
|
||||||
if trl.vllm_server_timeout:
|
if trl.vllm_server_timeout:
|
||||||
grpo_args_kwargs["vllm_server_timeout"] = trl.vllm_server_timeout
|
grpo_args_kwargs["vllm_server_timeout"] = trl.vllm_server_timeout
|
||||||
if trl.vllm_guided_decoding_regex:
|
if trl.vllm_guided_decoding_regex:
|
||||||
|
|||||||
@@ -47,6 +47,8 @@ cut_cross_entropy: true
|
|||||||
- qwen2
|
- qwen2
|
||||||
- cohere
|
- cohere
|
||||||
- cohere2
|
- cohere2
|
||||||
|
- glm
|
||||||
|
- glm4
|
||||||
|
|
||||||
## Citation
|
## Citation
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,57 @@
|
|||||||
|
"""GLM 4 patch. GLM family inherits from Llama."""
|
||||||
|
|
||||||
|
from types import MethodType
|
||||||
|
|
||||||
|
import transformers
|
||||||
|
from cut_cross_entropy.transformers.utils import (
|
||||||
|
PatchOptions,
|
||||||
|
TransformersModelT,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def patch_glm(
|
||||||
|
maybe_model: TransformersModelT | str | transformers.PretrainedConfig,
|
||||||
|
patch_options: PatchOptions,
|
||||||
|
) -> TransformersModelT | None:
|
||||||
|
|
||||||
|
# Set the _PATCH_OPTS in the llama patch file
|
||||||
|
import cut_cross_entropy.transformers.llama as llama_patch
|
||||||
|
|
||||||
|
llama_patch._PATCH_OPTS = patch_options # pylint: disable=protected-access
|
||||||
|
|
||||||
|
from cut_cross_entropy.transformers.llama import cce_forward
|
||||||
|
from transformers.models.glm import modeling_glm
|
||||||
|
|
||||||
|
if isinstance(maybe_model, transformers.PreTrainedModel):
|
||||||
|
assert isinstance(
|
||||||
|
maybe_model, modeling_glm.GlmForCausalLM
|
||||||
|
), f"Expected a GlmForCausalLM model. Got {type(maybe_model)}."
|
||||||
|
maybe_model.forward = MethodType(cce_forward, maybe_model)
|
||||||
|
return maybe_model
|
||||||
|
|
||||||
|
modeling_glm.GlmForCausalLM.forward = cce_forward
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def patch_glm4(
|
||||||
|
maybe_model: TransformersModelT | str | transformers.PretrainedConfig,
|
||||||
|
patch_options: PatchOptions,
|
||||||
|
) -> TransformersModelT | None:
|
||||||
|
|
||||||
|
# Set the _PATCH_OPTS in the llama patch file
|
||||||
|
import cut_cross_entropy.transformers.llama as llama_patch
|
||||||
|
|
||||||
|
llama_patch._PATCH_OPTS = patch_options # pylint: disable=protected-access
|
||||||
|
|
||||||
|
from cut_cross_entropy.transformers.llama import cce_forward
|
||||||
|
from transformers.models.glm4 import modeling_glm4
|
||||||
|
|
||||||
|
if isinstance(maybe_model, transformers.PreTrainedModel):
|
||||||
|
assert isinstance(
|
||||||
|
maybe_model, modeling_glm4.Glm4ForCausalLM
|
||||||
|
), f"Expected a Glm4ForCausalLM model. Got {type(maybe_model)}."
|
||||||
|
maybe_model.forward = MethodType(cce_forward, maybe_model)
|
||||||
|
return maybe_model
|
||||||
|
|
||||||
|
modeling_glm4.Glm4ForCausalLM.forward = cce_forward
|
||||||
|
return None
|
||||||
@@ -20,6 +20,10 @@ from axolotl.integrations.cut_cross_entropy.monkeypatch.gemma3 import (
|
|||||||
patch_gemma3,
|
patch_gemma3,
|
||||||
patch_gemma3_text,
|
patch_gemma3_text,
|
||||||
)
|
)
|
||||||
|
from axolotl.integrations.cut_cross_entropy.monkeypatch.glm4 import (
|
||||||
|
patch_glm,
|
||||||
|
patch_glm4,
|
||||||
|
)
|
||||||
from axolotl.integrations.cut_cross_entropy.monkeypatch.llama4 import (
|
from axolotl.integrations.cut_cross_entropy.monkeypatch.llama4 import (
|
||||||
patch_llama4,
|
patch_llama4,
|
||||||
patch_llama4_text,
|
patch_llama4_text,
|
||||||
@@ -45,6 +49,8 @@ CUT_CROSS_ENTROPY_MODEL_MAPPING = {
|
|||||||
"qwen2": patch_qwen2,
|
"qwen2": patch_qwen2,
|
||||||
"cohere": patch_cohere,
|
"cohere": patch_cohere,
|
||||||
"cohere2": patch_cohere2,
|
"cohere2": patch_cohere2,
|
||||||
|
"glm": patch_glm,
|
||||||
|
"glm4": patch_glm4,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ liger_fused_linear_cross_entropy: true
|
|||||||
- deepseek_v2
|
- deepseek_v2
|
||||||
- gemma
|
- gemma
|
||||||
- gemma2
|
- gemma2
|
||||||
- gemma3 (partial support, no support for FLCE yet)
|
- gemma3
|
||||||
- granite
|
- granite
|
||||||
- jamba
|
- jamba
|
||||||
- llama
|
- llama
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ It is designed to be performant, correct, and light-weight.
|
|||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
from functools import partial
|
|
||||||
|
|
||||||
from axolotl.integrations.base import BasePlugin
|
from axolotl.integrations.base import BasePlugin
|
||||||
|
|
||||||
@@ -55,7 +54,6 @@ class LigerPlugin(BasePlugin):
|
|||||||
)
|
)
|
||||||
from liger_kernel.transformers.cross_entropy import LigerCrossEntropyLoss
|
from liger_kernel.transformers.cross_entropy import LigerCrossEntropyLoss
|
||||||
from liger_kernel.transformers.functional import liger_cross_entropy
|
from liger_kernel.transformers.functional import liger_cross_entropy
|
||||||
from liger_kernel.transformers.geglu import LigerGEGLUMLP
|
|
||||||
from liger_kernel.transformers.layer_norm import LigerLayerNorm
|
from liger_kernel.transformers.layer_norm import LigerLayerNorm
|
||||||
from liger_kernel.transformers.monkey_patch import MODEL_TYPE_TO_APPLY_LIGER_FN
|
from liger_kernel.transformers.monkey_patch import MODEL_TYPE_TO_APPLY_LIGER_FN
|
||||||
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
||||||
@@ -141,38 +139,6 @@ class LigerPlugin(BasePlugin):
|
|||||||
modeling_mod.CrossEntropyLoss = LigerCrossEntropyLoss
|
modeling_mod.CrossEntropyLoss = LigerCrossEntropyLoss
|
||||||
if cfg.liger_fused_linear_cross_entropy:
|
if cfg.liger_fused_linear_cross_entropy:
|
||||||
modeling_mod.DeepseekV2ForCausalLM.forward = deepseekv2_lce_forward
|
modeling_mod.DeepseekV2ForCausalLM.forward = deepseekv2_lce_forward
|
||||||
elif cfg.model_config_type in ["gemma3", "gemma3_text"]:
|
|
||||||
from transformers.models.gemma3 import modeling_gemma3
|
|
||||||
|
|
||||||
if cfg.liger_rope:
|
|
||||||
modeling_gemma3.apply_rotary_pos_emb = liger_rotary_pos_emb
|
|
||||||
if cfg.liger_rms_norm:
|
|
||||||
|
|
||||||
def _liger_rms_norm_wrapper(dim, **kwargs):
|
|
||||||
"Convert 'dim' keyword to 'hidden_size' to pass to LigerRMSNorm"
|
|
||||||
return LigerRMSNorm(hidden_size=dim, **kwargs)
|
|
||||||
|
|
||||||
modeling_gemma3.Gemma3RMSNorm = partial(
|
|
||||||
_liger_rms_norm_wrapper,
|
|
||||||
offset=1.0,
|
|
||||||
casting_mode="gemma",
|
|
||||||
init_fn="zeros",
|
|
||||||
in_place=False,
|
|
||||||
)
|
|
||||||
if cfg.liger_glu_activation:
|
|
||||||
modeling_gemma3.Gemma3MLP = LigerGEGLUMLP
|
|
||||||
if cfg.liger_layer_norm:
|
|
||||||
modeling_gemma3.nn.LayerNorm = LigerLayerNorm
|
|
||||||
|
|
||||||
if cfg.liger_cross_entropy:
|
|
||||||
from transformers.loss.loss_utils import nn
|
|
||||||
|
|
||||||
nn.functional.cross_entropy = liger_cross_entropy
|
|
||||||
|
|
||||||
if cfg.liger_fused_linear_cross_entropy:
|
|
||||||
raise NotImplementedError(
|
|
||||||
"Fused linear cross entropy is not yet supported for Gemma3."
|
|
||||||
)
|
|
||||||
elif cfg.model_config_type == "llama4":
|
elif cfg.model_config_type == "llama4":
|
||||||
from axolotl.integrations.liger.models.llama4 import (
|
from axolotl.integrations.liger.models.llama4 import (
|
||||||
apply_liger_kernel_to_llama4,
|
apply_liger_kernel_to_llama4,
|
||||||
|
|||||||
108
src/axolotl/integrations/llm_compressor/README.md
Normal file
108
src/axolotl/integrations/llm_compressor/README.md
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
# LLMCompressor Integration
|
||||||
|
|
||||||
|
Fine-tune sparsified models in Axolotl using Neural Magic's [LLMCompressor](https://github.com/vllm-project/llm-compressor).
|
||||||
|
|
||||||
|
This integration enables fine-tuning of models sparsified using LLMCompressor within the Axolotl training framework. By combining LLMCompressor's model compression capabilities with Axolotl's distributed training pipelines, users can efficiently fine-tune sparse models at scale.
|
||||||
|
|
||||||
|
It uses Axolotl’s plugin system to hook into the fine-tuning flows while maintaining sparsity throughout training.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Axolotl with `llmcompressor` extras:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install "axolotl[llmcompressor]"
|
||||||
|
```
|
||||||
|
|
||||||
|
- Requires `llmcompressor >= 0.5.1`
|
||||||
|
|
||||||
|
This will install all necessary dependencies to fine-tune sparsified models using the integration.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To enable sparse fine-tuning with this integration, include the plugin in your Axolotl config:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
plugins:
|
||||||
|
- axolotl.integrations.llm_compressor.LLMCompressorPlugin
|
||||||
|
|
||||||
|
llmcompressor:
|
||||||
|
recipe:
|
||||||
|
finetuning_stage:
|
||||||
|
finetuning_modifiers:
|
||||||
|
ConstantPruningModifier:
|
||||||
|
targets: [
|
||||||
|
're:.*q_proj.weight',
|
||||||
|
're:.*k_proj.weight',
|
||||||
|
're:.*v_proj.weight',
|
||||||
|
're:.*o_proj.weight',
|
||||||
|
're:.*gate_proj.weight',
|
||||||
|
're:.*up_proj.weight',
|
||||||
|
're:.*down_proj.weight',
|
||||||
|
]
|
||||||
|
start: 0
|
||||||
|
save_compressed: true
|
||||||
|
# ... (other training arguments)
|
||||||
|
```
|
||||||
|
|
||||||
|
This plugin **does not apply pruning or sparsification itself** — it is intended for **fine-tuning models that have already been sparsified**.
|
||||||
|
|
||||||
|
Pre-sparsified checkpoints can be:
|
||||||
|
- Generated using [LLMCompressor](https://github.com/vllm-project/llm-compressor)
|
||||||
|
- Downloaded from [Neural Magic's Hugging Face page](https://huggingface.co/neuralmagic)
|
||||||
|
- Any custom LLM with compatible sparsity patterns that you've created yourself
|
||||||
|
|
||||||
|
To learn more about writing and customizing LLMCompressor recipes, refer to the official documentation:
|
||||||
|
[https://github.com/vllm-project/llm-compressor/blob/main/README.md](https://github.com/vllm-project/llm-compressor/blob/main/README.md)
|
||||||
|
|
||||||
|
### Storage Optimization with save_compressed
|
||||||
|
|
||||||
|
Setting `save_compressed: true` in your configuration enables saving models in a compressed format, which:
|
||||||
|
- Reduces disk space usage by approximately 40%
|
||||||
|
- Maintains compatibility with vLLM for accelerated inference
|
||||||
|
- Maintains compatibility with llmcompressor for further optimization (example: quantization)
|
||||||
|
|
||||||
|
This option is highly recommended when working with sparse models to maximize the benefits of model compression.
|
||||||
|
|
||||||
|
### Example Config
|
||||||
|
|
||||||
|
See [`examples/llama-3/sparse-finetuning.yaml`](examples/llama-3/sparse-finetuning.yaml) for a complete example.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Inference with vLLM
|
||||||
|
|
||||||
|
After fine-tuning your sparse model, you can leverage vLLM for efficient inference.
|
||||||
|
You can also use LLMCompressor to apply additional quantization to your fine-tuned
|
||||||
|
sparse model before inference for even greater performance benefits.:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
|
prompts = [
|
||||||
|
"Hello, my name is",
|
||||||
|
"The president of the United States is",
|
||||||
|
"The capital of France is",
|
||||||
|
"The future of AI is",
|
||||||
|
]
|
||||||
|
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
|
||||||
|
llm = LLM("path/to/your/sparse/model")
|
||||||
|
outputs = llm.generate(prompts, sampling_params)
|
||||||
|
|
||||||
|
for output in outputs:
|
||||||
|
prompt = output.prompt
|
||||||
|
generated_text = output.outputs[0].text
|
||||||
|
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
||||||
|
```
|
||||||
|
|
||||||
|
For more details on vLLM's capabilities and advanced configuration options, see the [official vLLM documentation](https://docs.vllm.ai/).
|
||||||
|
|
||||||
|
## Learn More
|
||||||
|
|
||||||
|
For details on available sparsity and quantization schemes, fine-tuning recipes, and usage examples, visit the official LLMCompressor repository:
|
||||||
|
|
||||||
|
[https://github.com/vllm-project/llm-compressor](https://github.com/vllm-project/llm-compressor)
|
||||||
5
src/axolotl/integrations/llm_compressor/__init__.py
Normal file
5
src/axolotl/integrations/llm_compressor/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""Integration entry point for the LLMCompressor plugin."""
|
||||||
|
|
||||||
|
from .plugin import LLMCompressorPlugin
|
||||||
|
|
||||||
|
__all__ = ["LLMCompressorPlugin"]
|
||||||
40
src/axolotl/integrations/llm_compressor/args.py
Normal file
40
src/axolotl/integrations/llm_compressor/args.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
"""
|
||||||
|
LLMCompressor and Sparse Finetuning config models.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
|
|
||||||
|
class CompressionArgs(BaseModel):
|
||||||
|
"""Sparse Finetuning config for LLMCompressor."""
|
||||||
|
|
||||||
|
# Typing for recipe is set to Any due to:
|
||||||
|
# https://github.com/vllm-project/llm-compressor/issues/1319
|
||||||
|
recipe: Annotated[
|
||||||
|
Any,
|
||||||
|
Field(
|
||||||
|
description="The recipe containing the compression algorithms and hyperparameters to apply."
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
save_compressed: Annotated[
|
||||||
|
bool,
|
||||||
|
Field(
|
||||||
|
default=False,
|
||||||
|
description="Whether to save the compressed model after training.",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class LLMCompressorArgs(BaseModel):
|
||||||
|
"""LLMCompressor configuration BaseModel."""
|
||||||
|
|
||||||
|
llmcompressor: Annotated[
|
||||||
|
CompressionArgs,
|
||||||
|
Field(
|
||||||
|
description="Arguments enabling compression pathways through the LLM Compressor plugins"
|
||||||
|
),
|
||||||
|
]
|
||||||
171
src/axolotl/integrations/llm_compressor/plugin.py
Normal file
171
src/axolotl/integrations/llm_compressor/plugin.py
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
"""
|
||||||
|
Sparse Finetuning plugin for Axolotl — enables handling of sparse neural networks
|
||||||
|
by maintaining masks for zero weights during training.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from functools import wraps
|
||||||
|
from typing import Any, Callable, Concatenate, ParamSpec, TypeVar
|
||||||
|
|
||||||
|
from llmcompressor import active_session, create_session
|
||||||
|
from llmcompressor.core import callbacks as session_callbacks
|
||||||
|
from llmcompressor.recipe import Recipe
|
||||||
|
from torch.nn import Module
|
||||||
|
from transformers.trainer import Trainer
|
||||||
|
from transformers.trainer_callback import TrainerCallback, TrainerControl, TrainerState
|
||||||
|
from transformers.training_args import TrainingArguments
|
||||||
|
|
||||||
|
from axolotl.integrations.base import BasePlugin
|
||||||
|
|
||||||
|
P = ParamSpec("P") # Params for generic function signatures
|
||||||
|
R = TypeVar("R") # Return type for generic function signatures
|
||||||
|
|
||||||
|
LOG = logging.getLogger("axolotl.integrations.llm_compressor")
|
||||||
|
|
||||||
|
|
||||||
|
class LLMCompressorCallbackHandler(TrainerCallback):
|
||||||
|
"""
|
||||||
|
Trainer callback for Sparse Finetuning.
|
||||||
|
Maintains sparsity patterns during training by applying masks after optimization steps,
|
||||||
|
ensuring zero-weight updates are canceled out.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, trainer: Trainer, recipe: Any):
|
||||||
|
"""
|
||||||
|
Initialize the Sparse Finetuning callback handler.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
trainer (Trainer): Huggingface Trainer instance.
|
||||||
|
recipe (Recipe | dict): Sparse finetuning recipe to apply.
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.trainer = trainer
|
||||||
|
self.recipe = (
|
||||||
|
Recipe.model_validate(recipe) if not isinstance(recipe, Recipe) else recipe
|
||||||
|
)
|
||||||
|
self.original_compute_loss = trainer.compute_loss
|
||||||
|
self.trainer.compute_loss = compute_loss_wrapper(self.trainer.compute_loss)
|
||||||
|
create_session()
|
||||||
|
|
||||||
|
def on_train_begin(
|
||||||
|
self,
|
||||||
|
args: TrainingArguments,
|
||||||
|
state: TrainerState,
|
||||||
|
control: TrainerControl,
|
||||||
|
**kwargs,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Called at the beginning of training. Initializes the compression session.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
args (TrainingArguments): Training arguments.
|
||||||
|
state (TrainerState): Trainer state.
|
||||||
|
control (TrainerControl): Trainer control.
|
||||||
|
"""
|
||||||
|
super().on_train_begin(args, state, control, **kwargs)
|
||||||
|
self.trainer.accelerator.wait_for_everyone()
|
||||||
|
active_session().initialize(
|
||||||
|
model=self.trainer.model,
|
||||||
|
optimizer=self.trainer.optimizer,
|
||||||
|
start=state.epoch,
|
||||||
|
recipe=self.recipe,
|
||||||
|
)
|
||||||
|
self.trainer.accelerator.wait_for_everyone()
|
||||||
|
|
||||||
|
def on_step_begin(
|
||||||
|
self,
|
||||||
|
args: TrainingArguments,
|
||||||
|
state: TrainerState,
|
||||||
|
control: TrainerControl,
|
||||||
|
**kwargs,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Called at the beginning of a training step. Triggers batch_start callback.
|
||||||
|
"""
|
||||||
|
super().on_step_begin(args, state, control, **kwargs)
|
||||||
|
session_callbacks.batch_start()
|
||||||
|
|
||||||
|
def on_step_end(
|
||||||
|
self,
|
||||||
|
args: TrainingArguments,
|
||||||
|
state: TrainerState,
|
||||||
|
control: TrainerControl,
|
||||||
|
**kwargs,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Called at the end of a training step. Triggers optimizer and batch_end callbacks.
|
||||||
|
"""
|
||||||
|
super().on_step_end(args, state, control, **kwargs)
|
||||||
|
session_callbacks.optim_pre_step()
|
||||||
|
session_callbacks.optim_post_step()
|
||||||
|
session_callbacks.batch_end()
|
||||||
|
|
||||||
|
def on_train_end(
|
||||||
|
self,
|
||||||
|
args: TrainingArguments,
|
||||||
|
state: TrainerState,
|
||||||
|
control: TrainerControl,
|
||||||
|
**kwargs,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Called at the end of training. Finalizes the compression session.
|
||||||
|
"""
|
||||||
|
super().on_train_end(args, state, control, **kwargs)
|
||||||
|
active_session().finalize()
|
||||||
|
self.trainer.compute_loss_func = self.original_compute_loss
|
||||||
|
|
||||||
|
|
||||||
|
class LLMCompressorPlugin(BasePlugin):
|
||||||
|
"""
|
||||||
|
Sparse Finetuning plugin for Axolotl integration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get_input_args(self) -> str:
|
||||||
|
"""
|
||||||
|
Returns the path to the plugin's argument definition.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Dotted path to the LLMCompressorArgs class.
|
||||||
|
"""
|
||||||
|
return "axolotl.integrations.llm_compressor.args.LLMCompressorArgs"
|
||||||
|
|
||||||
|
def add_callbacks_post_trainer(self, cfg: Any, trainer: Trainer) -> list:
|
||||||
|
"""
|
||||||
|
Adds Sparse Finetuning callback to the Trainer instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cfg (Any): Configuration object containing the sparse recipe.
|
||||||
|
trainer (Trainer): Huggingface Trainer instance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: List containing the configured callback instances.
|
||||||
|
"""
|
||||||
|
LOG.info("Adding Sparse Finetuning callback to the trainer")
|
||||||
|
callback = LLMCompressorCallbackHandler(
|
||||||
|
trainer=trainer,
|
||||||
|
recipe=cfg.llmcompressor.recipe,
|
||||||
|
)
|
||||||
|
return [callback]
|
||||||
|
|
||||||
|
|
||||||
|
def compute_loss_wrapper(
|
||||||
|
compute_loss_func: Callable[Concatenate[Module, P], R],
|
||||||
|
) -> Callable[Concatenate[Module, P], R]:
|
||||||
|
"""
|
||||||
|
Wraps the loss computation function to trigger the loss_calculated callback.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
compute_loss_func (Callable): Original loss computation function.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Callable: Wrapped function that also invokes the loss_calculated callback.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@wraps(compute_loss_func)
|
||||||
|
def compute_and_notify(model: Module, *args: P.args, **kwargs: P.kwargs) -> R:
|
||||||
|
loss = compute_loss_func(model, *args, **kwargs)
|
||||||
|
if active_session().lifecycle.initialized_ and model.training:
|
||||||
|
session_callbacks.loss_calculated(loss=loss)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
return compute_and_notify
|
||||||
40
src/axolotl/integrations/llm_compressor/utils.py
Normal file
40
src/axolotl/integrations/llm_compressor/utils.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
"""Utilities for llmcompressor integration with axolotl."""
|
||||||
|
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from llmcompressor.transformers.sparsification.compressed_tensors_utils import (
|
||||||
|
modify_save_pretrained,
|
||||||
|
)
|
||||||
|
from transformers import PreTrainedModel, Trainer
|
||||||
|
|
||||||
|
|
||||||
|
def save_compressed_model(
|
||||||
|
model: PreTrainedModel,
|
||||||
|
output_dir: Union[str, bytes],
|
||||||
|
trainer: Trainer,
|
||||||
|
safe_serialization: bool = False,
|
||||||
|
save_compressed: bool = False,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Synchronize processes, apply compression hooks, and save the model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model (PreTrainedModel): The model to be saved.
|
||||||
|
output_dir (str or bytes): Path where the model files will be written.
|
||||||
|
trainer (Trainer): Hugging Face Trainer for process synchronization.
|
||||||
|
safe_serialization (bool): Use safe serialization if True.
|
||||||
|
save_compressed (bool): Write compressed tensors if True.
|
||||||
|
"""
|
||||||
|
trainer.accelerator.wait_for_everyone()
|
||||||
|
|
||||||
|
# Only the main process writes the files
|
||||||
|
if not trainer.accelerator.is_main_process:
|
||||||
|
return
|
||||||
|
|
||||||
|
modify_save_pretrained(model)
|
||||||
|
model.save_pretrained(
|
||||||
|
output_dir,
|
||||||
|
safe_serialization=safe_serialization,
|
||||||
|
save_compressed=save_compressed,
|
||||||
|
skip_sparsity_compression_stats=not save_compressed,
|
||||||
|
)
|
||||||
@@ -31,6 +31,8 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
|
|||||||
"starcoder2",
|
"starcoder2",
|
||||||
"deepseek_v2",
|
"deepseek_v2",
|
||||||
"deepseek_v3",
|
"deepseek_v3",
|
||||||
|
"glm",
|
||||||
|
"glm4",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -272,7 +272,7 @@ class ReLoRAScheduler(LRScheduler):
|
|||||||
self.warmup_steps = warmup_steps
|
self.warmup_steps = warmup_steps
|
||||||
self.anneal_steps = anneal_steps
|
self.anneal_steps = anneal_steps
|
||||||
self.min_lr_scale = min_lr_scale
|
self.min_lr_scale = min_lr_scale
|
||||||
super().__init__(optimizer, inner_schedule.last_epoch, inner_schedule.verbose)
|
super().__init__(optimizer, inner_schedule.last_epoch)
|
||||||
|
|
||||||
def get_lr(self) -> float:
|
def get_lr(self) -> float:
|
||||||
self.inner_schedule.last_epoch = self.last_epoch
|
self.inner_schedule.last_epoch = self.last_epoch
|
||||||
|
|||||||
@@ -4,73 +4,30 @@ module for base dataset transform strategies
|
|||||||
|
|
||||||
import importlib
|
import importlib
|
||||||
import logging
|
import logging
|
||||||
import sys
|
|
||||||
|
|
||||||
LOG = logging.getLogger("axolotl")
|
LOG = logging.getLogger("axolotl")
|
||||||
|
|
||||||
|
|
||||||
def import_from_path(module_name: str, file_path: str):
|
|
||||||
"""
|
|
||||||
Import a module from a file path.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
module_name: Name of the module.
|
|
||||||
file_path: Path to the file.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
module: The imported module.
|
|
||||||
"""
|
|
||||||
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
|
||||||
if spec is None:
|
|
||||||
raise ImportError(f"Could not create module spec for: {file_path}")
|
|
||||||
module = importlib.util.module_from_spec(spec)
|
|
||||||
|
|
||||||
sys.modules[module_name] = module
|
|
||||||
loader = importlib.machinery.SourceFileLoader(module_name, file_path)
|
|
||||||
spec.loader = loader
|
|
||||||
loader.exec_module(module)
|
|
||||||
return module
|
|
||||||
|
|
||||||
|
|
||||||
def load(strategy, cfg, module_base=None, **kwargs):
|
def load(strategy, cfg, module_base=None, **kwargs):
|
||||||
if len(strategy.split(".")) == 1:
|
try:
|
||||||
strategy = strategy + ".default"
|
if len(strategy.split(".")) == 1:
|
||||||
load_fn = strategy.split(".")[-1]
|
strategy = strategy + ".default"
|
||||||
func = None
|
load_fn = strategy.split(".")[-1]
|
||||||
if len(strategy.split(".")) > 1:
|
if len(strategy.split(".")) > 1:
|
||||||
try:
|
try:
|
||||||
mod = importlib.import_module(
|
importlib.import_module(
|
||||||
strategy.split(".")[-2],
|
strategy.split(".")[-2],
|
||||||
".".join(strategy.split(".")[:-2]),
|
".".join(strategy.split(".")[:-2]),
|
||||||
)
|
)
|
||||||
func = getattr(mod, load_fn)
|
module_base = ".".join(strategy.split(".")[:-2])
|
||||||
return func(cfg, **kwargs)
|
strategy = strategy.split(".")[-2]
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
pass
|
strategy = "." + ".".join(strategy.split(".")[:-1])
|
||||||
|
else:
|
||||||
try:
|
strategy = "." + ".".join(strategy.split(".")[:-1])
|
||||||
mod = importlib.import_module(
|
|
||||||
"." + ".".join(strategy.split(".")[:-1]), module_base
|
|
||||||
)
|
|
||||||
func = getattr(mod, load_fn)
|
|
||||||
return func(cfg, **kwargs)
|
|
||||||
except ModuleNotFoundError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
file_path = "/".join(strategy.split(".")[:-1]) + ".py"
|
|
||||||
module_name = strategy.split(".")[-2]
|
|
||||||
mod = import_from_path(module_name, file_path)
|
|
||||||
func = getattr(mod, load_fn)
|
|
||||||
if func is not None:
|
|
||||||
return func(cfg, **kwargs)
|
|
||||||
except FileNotFoundError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
strategy = "." + ".".join(strategy.split(".")[:-1])
|
|
||||||
mod = importlib.import_module(strategy, module_base)
|
mod = importlib.import_module(strategy, module_base)
|
||||||
func = getattr(mod, load_fn)
|
func = getattr(mod, load_fn)
|
||||||
return func(cfg, **kwargs)
|
return func(cfg, **kwargs)
|
||||||
|
except Exception: # pylint: disable=broad-exception-caught
|
||||||
LOG.warning(f"unable to load strategy {strategy}")
|
LOG.warning(f"unable to load strategy {strategy}")
|
||||||
return func
|
return None
|
||||||
|
|||||||
@@ -271,6 +271,19 @@ def save_trained_model(
|
|||||||
os.remove(os.path.join(cfg.output_dir, "model.safetensors"))
|
os.remove(os.path.join(cfg.output_dir, "model.safetensors"))
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
|
elif hasattr(cfg, "llmcompressor") and cfg.llmcompressor:
|
||||||
|
from axolotl.integrations.llm_compressor.utils import (
|
||||||
|
save_compressed_model,
|
||||||
|
)
|
||||||
|
|
||||||
|
save_compressed_model(
|
||||||
|
model=model,
|
||||||
|
output_dir=cfg.output_dir,
|
||||||
|
trainer=trainer,
|
||||||
|
safe_serialization=safe_serialization,
|
||||||
|
save_compressed=cfg.llmcompressor.save_compressed,
|
||||||
|
)
|
||||||
|
|
||||||
elif cfg.local_rank == 0:
|
elif cfg.local_rank == 0:
|
||||||
if cfg.flash_optimum and BetterTransformer:
|
if cfg.flash_optimum and BetterTransformer:
|
||||||
model = BetterTransformer.reverse(model)
|
model = BetterTransformer.reverse(model)
|
||||||
@@ -279,6 +292,7 @@ def save_trained_model(
|
|||||||
trainer.model.save_pretrained(
|
trainer.model.save_pretrained(
|
||||||
cfg.output_dir, safe_serialization=safe_serialization
|
cfg.output_dir, safe_serialization=safe_serialization
|
||||||
)
|
)
|
||||||
|
|
||||||
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
import functools
|
import functools
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Tuple, Union
|
from typing import List, Optional, Tuple, Union
|
||||||
|
|
||||||
@@ -117,9 +118,27 @@ def prepare_dataset(cfg, tokenizer, processor=None, preprocess_iterable=None):
|
|||||||
cfg.pretraining_dataset[0]["type"] or "pretrain",
|
cfg.pretraining_dataset[0]["type"] or "pretrain",
|
||||||
)
|
)
|
||||||
|
|
||||||
iter_ds = load_dataset(
|
# when letting accelerator dispatch batches from the main process, we don't need to load the dataset from
|
||||||
path, streaming=True, split=split, name=name, data_files=data_files
|
# other ranks, we just need to present a fake dataset
|
||||||
)
|
if (
|
||||||
|
cfg.accelerator_config
|
||||||
|
and cfg.accelerator_config.dispatch_batches
|
||||||
|
and not is_local_main_process()
|
||||||
|
):
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f:
|
||||||
|
f.write("text\n")
|
||||||
|
f.write("lorem ipsum dolor sit amet\n")
|
||||||
|
# rewind the file pointer to the beginning so we can read it again
|
||||||
|
f.seek(0)
|
||||||
|
iter_ds = load_dataset(
|
||||||
|
"csv", data_files=f.name, split="train", streaming=True
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if is_local_main_process():
|
||||||
|
iter_ds = load_dataset(
|
||||||
|
path, streaming=True, split=split, name=name, data_files=data_files
|
||||||
|
)
|
||||||
|
|
||||||
if skip:
|
if skip:
|
||||||
LOG.info(f"Skipping {skip} samples from the dataset")
|
LOG.info(f"Skipping {skip} samples from the dataset")
|
||||||
iter_ds = iter_ds.skip(skip)
|
iter_ds = iter_ds.skip(skip)
|
||||||
|
|||||||
@@ -139,6 +139,22 @@ def check_model_config(cfg: DictDefault, model_config: PretrainedConfig):
|
|||||||
hasattr(model_config, "quantization_config")
|
hasattr(model_config, "quantization_config")
|
||||||
and model_config.quantization_config
|
and model_config.quantization_config
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Detect compressed-tensors config
|
||||||
|
is_compressed_tensors_config = (
|
||||||
|
quant_config_exists
|
||||||
|
and model_config.quantization_config.get("quant_method") == "compressed-tensors"
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_compressed_tensors_config:
|
||||||
|
if model_config.quantization_config.get("config_groups"):
|
||||||
|
LOG.warning(
|
||||||
|
"Found `config_groups` in a compressed-tensors config. "
|
||||||
|
"QAT integration with llmcompressor is not tested."
|
||||||
|
)
|
||||||
|
# Skip further quant checks for compressed-tensors
|
||||||
|
return
|
||||||
|
|
||||||
quant_config_method_is_gptq = (
|
quant_config_method_is_gptq = (
|
||||||
quant_config_exists
|
quant_config_exists
|
||||||
and "quant_method" in model_config.quantization_config
|
and "quant_method" in model_config.quantization_config
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ class RexLR(LRScheduler):
|
|||||||
self.max_lr = max_lr
|
self.max_lr = max_lr
|
||||||
self.total_steps = total_steps
|
self.total_steps = total_steps
|
||||||
self.num_warmup_steps = num_warmup_steps
|
self.num_warmup_steps = num_warmup_steps
|
||||||
self.last_step = last_step - 1
|
self.last_step = max(last_step - 1, 0)
|
||||||
|
|
||||||
# Ensure each parameter group has an "initial_lr" key to avoid issues when resuming.
|
# Ensure each parameter group has an "initial_lr" key to avoid issues when resuming.
|
||||||
for group in optimizer.param_groups:
|
for group in optimizer.param_groups:
|
||||||
|
|||||||
@@ -660,6 +660,7 @@ class AxolotlInputConfig(
|
|||||||
data.get("val_set_size") == 0
|
data.get("val_set_size") == 0
|
||||||
and (data.get("eval_steps") or data.get("eval_strategy"))
|
and (data.get("eval_steps") or data.get("eval_strategy"))
|
||||||
and not data.get("test_datasets")
|
and not data.get("test_datasets")
|
||||||
|
and data.get("eval_strategy") != "no"
|
||||||
):
|
):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"eval_steps and eval_strategy are not supported with val_set_size == 0"
|
"eval_steps and eval_strategy are not supported with val_set_size == 0"
|
||||||
|
|||||||
@@ -36,3 +36,11 @@ class VllmConfig(BaseModel):
|
|||||||
default=None,
|
default=None,
|
||||||
json_schema_extra={"description": "Enable prefix caching for VLLM"},
|
json_schema_extra={"description": "Enable prefix caching for VLLM"},
|
||||||
)
|
)
|
||||||
|
host: str | None = Field(
|
||||||
|
default="0.0.0.0", # nosec B104
|
||||||
|
json_schema_extra={"description": "Host for the vLLM server to start on"},
|
||||||
|
)
|
||||||
|
port: int | None = Field(
|
||||||
|
default=8000,
|
||||||
|
json_schema_extra={"description": "Port of the vLLM server to start on"},
|
||||||
|
)
|
||||||
|
|||||||
@@ -193,6 +193,14 @@ def download_tiny_shakespeare_dataset():
|
|||||||
snapshot_download_w_retry("winglian/tiny-shakespeare", repo_type="dataset")
|
snapshot_download_w_retry("winglian/tiny-shakespeare", repo_type="dataset")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_evolkit_kd_sample_dataset():
|
||||||
|
# download the dataset
|
||||||
|
snapshot_download_w_retry(
|
||||||
|
"axolotl-ai-co/evolkit-logprobs-pipeline-75k-v2-sample", repo_type="dataset"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
def download_deepseek_model_fixture():
|
def download_deepseek_model_fixture():
|
||||||
snapshot_download_w_retry("axolotl-ai-co/DeepSeek-V3-11M", repo_type="model")
|
snapshot_download_w_retry("axolotl-ai-co/DeepSeek-V3-11M", repo_type="model")
|
||||||
@@ -208,6 +216,16 @@ def download_huggyllama_model_fixture():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_llama33_70b_model_fixture():
|
||||||
|
# download the tokenizer only
|
||||||
|
snapshot_download_w_retry(
|
||||||
|
"axolotl-ai-co/Llama-3.3-70B-Instruct-tokenizer",
|
||||||
|
repo_type="model",
|
||||||
|
allow_patterns=["*token*", "config.json"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session", autouse=True)
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
def download_llama_1b_model_fixture():
|
def download_llama_1b_model_fixture():
|
||||||
# download the tokenizer only
|
# download the tokenizer only
|
||||||
@@ -315,6 +333,14 @@ def download_llama2_model_fixture():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def download_llama32_1b_model_fixture():
|
||||||
|
snapshot_download_w_retry(
|
||||||
|
"osllmai-community/Llama-3.2-1B",
|
||||||
|
repo_type="model",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@enable_hf_offline
|
@enable_hf_offline
|
||||||
def tokenizer_huggyllama(
|
def tokenizer_huggyllama(
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from axolotl.cli.args import TrainerCliArgs
|
|||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils import get_pytorch_version
|
from axolotl.utils import get_pytorch_version
|
||||||
from axolotl.utils.config import normalize_config, prepare_plugins
|
from axolotl.utils.config import normalize_config, prepare_plugins, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists
|
from ..utils import check_model_output_exists
|
||||||
@@ -56,6 +56,7 @@ class TestCutCrossEntropyIntegration:
|
|||||||
# pylint: disable=redefined-outer-name
|
# pylint: disable=redefined-outer-name
|
||||||
def test_llama_w_cce(self, min_cfg, temp_dir):
|
def test_llama_w_cce(self, min_cfg, temp_dir):
|
||||||
cfg = DictDefault(min_cfg)
|
cfg = DictDefault(min_cfg)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
@@ -101,6 +102,7 @@ class TestCutCrossEntropyIntegration:
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
@@ -129,6 +131,7 @@ class TestCutCrossEntropyIntegration:
|
|||||||
attention_type: True,
|
attention_type: True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ Simple end-to-end test for Liger integration
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config, prepare_plugins
|
from axolotl.utils.config import normalize_config, prepare_plugins, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from tests.e2e.utils import check_model_output_exists, require_torch_2_4_1
|
from tests.e2e.utils import check_model_output_exists, require_torch_2_4_1
|
||||||
@@ -54,6 +54,7 @@ class LigerIntegrationTestCase:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
|
cfg = validate_config(cfg)
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
@@ -100,6 +101,7 @@ class LigerIntegrationTestCase:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
|
cfg = validate_config(cfg)
|
||||||
prepare_plugins(cfg)
|
prepare_plugins(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
|
|||||||
104
tests/e2e/integrations/test_llm_compressor.py
Normal file
104
tests/e2e/integrations/test_llm_compressor.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
"""
|
||||||
|
E2E smoke tests for LLMCompressorPlugin integration
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
|
from axolotl.common.datasets import load_datasets
|
||||||
|
from axolotl.train import train
|
||||||
|
from axolotl.utils.config import normalize_config, prepare_plugins, validate_config
|
||||||
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
|
from tests.e2e.utils import check_model_output_exists, require_torch_2_4_1
|
||||||
|
|
||||||
|
MODELS = [
|
||||||
|
"nm-testing/llama2.c-stories42M-pruned2.4-compressed",
|
||||||
|
"nm-testing/llama2.c-stories42M-gsm8k-sparse-only-compressed",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"base_model", MODELS, ids=["no-checkpoint-recipe", "with-checkpoint-recipe"]
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"save_compressed", [True, False], ids=["save_compressed", "save_uncompressed"]
|
||||||
|
)
|
||||||
|
class TestLLMCompressorIntegration:
|
||||||
|
"""
|
||||||
|
e2e tests for axolotl.integrations.llm_compressor.LLMCompressorPlugin
|
||||||
|
"""
|
||||||
|
|
||||||
|
@require_torch_2_4_1
|
||||||
|
def test_llmcompressor_plugin(
|
||||||
|
self, temp_dir, base_model: str, save_compressed: bool
|
||||||
|
):
|
||||||
|
# core cfg
|
||||||
|
cfg = DictDefault(
|
||||||
|
{
|
||||||
|
"base_model": base_model,
|
||||||
|
"plugins": ["axolotl.integrations.llm_compressor.LLMCompressorPlugin"],
|
||||||
|
"sequence_len": 1024,
|
||||||
|
"val_set_size": 0.05,
|
||||||
|
"special_tokens": {"pad_token": "<|endoftext|>"},
|
||||||
|
"datasets": [{"path": "mhenrichsen/alpaca_2k_test", "type": "alpaca"}],
|
||||||
|
"num_epochs": 1,
|
||||||
|
"micro_batch_size": 2,
|
||||||
|
"gradient_accumulation_steps": 2,
|
||||||
|
"output_dir": temp_dir,
|
||||||
|
"learning_rate": 1e-5,
|
||||||
|
"optimizer": "adamw_torch_fused",
|
||||||
|
"lr_scheduler": "cosine",
|
||||||
|
"save_safetensors": True,
|
||||||
|
"bf16": "auto",
|
||||||
|
"max_steps": 5,
|
||||||
|
"llmcompressor": {
|
||||||
|
"recipe": {
|
||||||
|
"finetuning_stage": {
|
||||||
|
"finetuning_modifiers": {
|
||||||
|
"ConstantPruningModifier": {
|
||||||
|
"targets": [
|
||||||
|
"re:.*q_proj.weight",
|
||||||
|
"re:.*k_proj.weight",
|
||||||
|
"re:.*v_proj.weight",
|
||||||
|
"re:.*o_proj.weight",
|
||||||
|
"re:.*gate_proj.weight",
|
||||||
|
"re:.*up_proj.weight",
|
||||||
|
"re:.*down_proj.weight",
|
||||||
|
],
|
||||||
|
"start": 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"save_compressed": save_compressed,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
prepare_plugins(cfg)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
|
normalize_config(cfg)
|
||||||
|
cli_args = TrainerCliArgs()
|
||||||
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|
||||||
|
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||||
|
check_model_output_exists(temp_dir, cfg)
|
||||||
|
_check_llmcompressor_model_outputs(temp_dir, save_compressed)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_llmcompressor_model_outputs(temp_dir, save_compressed):
|
||||||
|
|
||||||
|
# recipe.yaml should exist
|
||||||
|
assert (Path(temp_dir) / "recipe.yaml").exists()
|
||||||
|
|
||||||
|
# sparsity config exists if save_compressed
|
||||||
|
if save_compressed:
|
||||||
|
from compressed_tensors import ModelCompressor
|
||||||
|
from compressed_tensors.config import Sparse24BitMaskConfig
|
||||||
|
|
||||||
|
compressor = ModelCompressor.from_pretrained(temp_dir)
|
||||||
|
assert compressor is not None
|
||||||
|
assert isinstance(compressor.sparsity_config, Sparse24BitMaskConfig)
|
||||||
0
tests/e2e/multigpu/patched/__init__.py
Normal file
0
tests/e2e/multigpu/patched/__init__.py
Normal file
@@ -10,7 +10,7 @@ from transformers.testing_utils import get_torch_dist_unique_port
|
|||||||
|
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_tensorboard
|
from ...utils import check_tensorboard
|
||||||
|
|
||||||
os.environ["WANDB_DISABLED"] = "true"
|
os.environ["WANDB_DISABLED"] = "true"
|
||||||
|
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# Tests under this directory should get run "solo" on their own as they
|
||||||
|
# seem to cause issues when run in the same batch as other tests.
|
||||||
|
|||||||
@@ -49,8 +49,9 @@ class TestPackedFlex:
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "vicgalle/alpaca-gpt4",
|
"path": "tatsu-lab/alpaca",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
|
"split": "train[:10%]",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -60,6 +60,7 @@ class Test4dMultipackLlama(unittest.TestCase):
|
|||||||
"fp16": True,
|
"fp16": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -104,6 +105,7 @@ class Test4dMultipackLlama(unittest.TestCase):
|
|||||||
"fp16": True,
|
"fp16": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -63,6 +63,7 @@ class TestFalconPatched(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -103,6 +104,7 @@ class TestFalconPatched(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from transformers.utils import is_torch_bf16_gpu_available
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -67,6 +67,7 @@ class TestFusedLlama(unittest.TestCase):
|
|||||||
cfg.bf16 = True
|
cfg.bf16 = True
|
||||||
else:
|
else:
|
||||||
cfg.fp16 = True
|
cfg.fp16 = True
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import pytest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -65,6 +65,7 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -105,6 +106,7 @@ class TestLlamaShiftedSparseAttention(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_availab
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -70,6 +70,7 @@ class TestLoraLlama(unittest.TestCase):
|
|||||||
else:
|
else:
|
||||||
cfg.fp16 = True
|
cfg.fp16 = True
|
||||||
|
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -120,6 +121,7 @@ class TestLoraLlama(unittest.TestCase):
|
|||||||
"lr_scheduler": "cosine",
|
"lr_scheduler": "cosine",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -63,6 +63,7 @@ class TestMistral(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -104,6 +105,7 @@ class TestMistral(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -60,6 +60,7 @@ class TestMixtral(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import unittest
|
|||||||
|
|
||||||
import transformers
|
import transformers
|
||||||
|
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
from axolotl.utils.models import load_model, load_tokenizer
|
from axolotl.utils.models import load_model, load_tokenizer
|
||||||
|
|
||||||
@@ -47,6 +47,7 @@ class TestModelPatches(unittest.TestCase):
|
|||||||
"eval_steps": 10,
|
"eval_steps": 10,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
tokenizer = load_tokenizer(cfg)
|
tokenizer = load_tokenizer(cfg)
|
||||||
load_model(cfg, tokenizer, inference=False)
|
load_model(cfg, tokenizer, inference=False)
|
||||||
@@ -79,6 +80,7 @@ class TestModelPatches(unittest.TestCase):
|
|||||||
"eval_steps": 10,
|
"eval_steps": 10,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
tokenizer = load_tokenizer(cfg)
|
tokenizer = load_tokenizer(cfg)
|
||||||
load_model(cfg, tokenizer, inference=False)
|
load_model(cfg, tokenizer, inference=False)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, with_temp_dir
|
from ..utils import check_model_output_exists, with_temp_dir
|
||||||
@@ -63,6 +63,7 @@ class TestPhiMultipack(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -82,7 +83,7 @@ class TestPhiMultipack(unittest.TestCase):
|
|||||||
"sample_packing": True,
|
"sample_packing": True,
|
||||||
"flash_attention": True,
|
"flash_attention": True,
|
||||||
"pad_to_sequence_len": True,
|
"pad_to_sequence_len": True,
|
||||||
"load_in_8bit": False,
|
"load_in_4bit": True,
|
||||||
"adapter": "qlora",
|
"adapter": "qlora",
|
||||||
"lora_r": 64,
|
"lora_r": 64,
|
||||||
"lora_alpha": 32,
|
"lora_alpha": 32,
|
||||||
@@ -114,6 +115,7 @@ class TestPhiMultipack(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from transformers.utils import is_torch_bf16_gpu_available
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, most_recent_subdir
|
from ..utils import check_model_output_exists, most_recent_subdir
|
||||||
@@ -46,8 +46,9 @@ class TestResumeLlama:
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "vicgalle/alpaca-gpt4",
|
"path": "tatsu-lab/alpaca",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
|
"split": "train[:10%]",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 2,
|
"num_epochs": 2,
|
||||||
@@ -67,6 +68,7 @@ class TestResumeLlama:
|
|||||||
cfg.bf16 = True
|
cfg.bf16 = True
|
||||||
else:
|
else:
|
||||||
cfg.fp16 = True
|
cfg.fp16 = True
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import pytest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from ..utils import check_model_output_exists, check_tensorboard
|
from ..utils import check_model_output_exists, check_tensorboard
|
||||||
@@ -72,6 +72,7 @@ class TestUnslothQLoRA:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -122,6 +123,7 @@ class TestUnslothQLoRA:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
@@ -177,6 +179,7 @@ class TestUnslothQLoRA:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -41,8 +41,9 @@ class TestPackedFlex(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "vicgalle/alpaca-gpt4",
|
"path": "tatsu-lab/alpaca",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
|
"split": "train[:10%]",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
|
|||||||
@@ -1,85 +0,0 @@
|
|||||||
"""
|
|
||||||
E2E tests for preprocessing
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
import transformers
|
|
||||||
|
|
||||||
from axolotl.cli.args import PreprocessCliArgs
|
|
||||||
from axolotl.common.datasets import load_preference_datasets
|
|
||||||
from axolotl.utils.config import normalize_config, validate_config
|
|
||||||
from axolotl.utils.dict import DictDefault
|
|
||||||
|
|
||||||
from ..utils import with_temp_dir
|
|
||||||
|
|
||||||
LOG = logging.getLogger("axolotl.tests.e2e")
|
|
||||||
os.environ["WANDB_DISABLED"] = "true"
|
|
||||||
|
|
||||||
|
|
||||||
class TestCustomRewardFunctionLoading(unittest.TestCase):
|
|
||||||
"""
|
|
||||||
Test case for GRPO training using single GPU
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _utils_write_rewards(self):
|
|
||||||
# write cfg to yaml file
|
|
||||||
with open("rewards.py", "w", encoding="utf-8") as fout:
|
|
||||||
fout.write(
|
|
||||||
"""import random
|
|
||||||
def rand_reward_func(completions, **kwargs) -> list[float]:
|
|
||||||
return [random.uniform(0, 1) for _ in completions]
|
|
||||||
|
|
||||||
def oai_gsm8k_transform(cfg, *args, **kwargs):
|
|
||||||
def transform_fn(example, tokenizer=None):
|
|
||||||
label = example["answer"].split("####")[-1].strip().replace(",", "")
|
|
||||||
return {
|
|
||||||
"prompt": [{"role": "user", "content": example["question"]},],
|
|
||||||
"answer": label,
|
|
||||||
}
|
|
||||||
return transform_fn, {"remove_columns": ["question"]}
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
|
|
||||||
@with_temp_dir
|
|
||||||
def test_custom_rewards_fn_preprocess(self, temp_dir):
|
|
||||||
# pylint: disable=duplicate-code
|
|
||||||
cfg = DictDefault(
|
|
||||||
{
|
|
||||||
"base_model": "HuggingFaceTB/SmolLM2-135M",
|
|
||||||
"strict": False,
|
|
||||||
"rl": "grpo",
|
|
||||||
"trl": {
|
|
||||||
"beta": 0.001,
|
|
||||||
"max_completion_length": 256,
|
|
||||||
"use_vllm": True,
|
|
||||||
"num_generations": 4,
|
|
||||||
"reward_funcs": [
|
|
||||||
"rewards.rand_reward_func"
|
|
||||||
], # format: '{file_name}.{fn_name}'
|
|
||||||
"reward_weights": [1.0],
|
|
||||||
},
|
|
||||||
"datasets": [
|
|
||||||
{
|
|
||||||
"path": "openai/gsm8k",
|
|
||||||
"name": "main",
|
|
||||||
"type": "rewards.oai_gsm8k_transform",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"dataset_prepared_path": temp_dir,
|
|
||||||
"gradient_accumulation_steps": 1,
|
|
||||||
"micro_batch_size": 1,
|
|
||||||
"learning_rate": 0.000005,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
self._utils_write_rewards()
|
|
||||||
|
|
||||||
cfg = validate_config(cfg)
|
|
||||||
normalize_config(cfg)
|
|
||||||
parser = transformers.HfArgumentParser(PreprocessCliArgs)
|
|
||||||
cli_args, _ = parser.parse_args_into_dataclasses(return_remaining_strings=True)
|
|
||||||
|
|
||||||
load_preference_datasets(cfg=cfg, cli_args=cli_args)
|
|
||||||
@@ -102,6 +102,7 @@ class TestEmbeddingsLrScale(unittest.TestCase):
|
|||||||
"use_tensorboard": True,
|
"use_tensorboard": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -109,6 +109,7 @@ class TestLlamaVision(unittest.TestCase):
|
|||||||
"bf16": True,
|
"bf16": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -40,8 +40,9 @@ class TestPackedLlama(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
"datasets": [
|
"datasets": [
|
||||||
{
|
{
|
||||||
"path": "vicgalle/alpaca-gpt4",
|
"path": "tatsu-lab/alpaca",
|
||||||
"type": "alpaca",
|
"type": "alpaca",
|
||||||
|
"split": "train[:10%]",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ class TestPhi(unittest.TestCase):
|
|||||||
"tokenizer_type": "AutoTokenizer",
|
"tokenizer_type": "AutoTokenizer",
|
||||||
"sequence_len": 2048,
|
"sequence_len": 2048,
|
||||||
"sample_packing": False,
|
"sample_packing": False,
|
||||||
"load_in_8bit": False,
|
"load_in_4bit": True,
|
||||||
"adapter": "qlora",
|
"adapter": "qlora",
|
||||||
"lora_r": 64,
|
"lora_r": 64,
|
||||||
"lora_alpha": 32,
|
"lora_alpha": 32,
|
||||||
@@ -111,6 +111,7 @@ class TestPhi(unittest.TestCase):
|
|||||||
"bf16": "auto",
|
"bf16": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import unittest
|
|||||||
from axolotl.cli.args import TrainerCliArgs
|
from axolotl.cli.args import TrainerCliArgs
|
||||||
from axolotl.common.datasets import load_datasets
|
from axolotl.common.datasets import load_datasets
|
||||||
from axolotl.train import train
|
from axolotl.train import train
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.dict import DictDefault
|
from axolotl.utils.dict import DictDefault
|
||||||
|
|
||||||
from .utils import check_model_output_exists, check_tensorboard, with_temp_dir
|
from .utils import check_model_output_exists, check_tensorboard, with_temp_dir
|
||||||
@@ -57,6 +57,7 @@ class TestProcessRewardSmolLM2(unittest.TestCase):
|
|||||||
"seed": 42,
|
"seed": 42,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
cfg = validate_config(cfg)
|
||||||
normalize_config(cfg)
|
normalize_config(cfg)
|
||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from unittest.mock import patch
|
|||||||
import pytest
|
import pytest
|
||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
|
|
||||||
from axolotl.utils.config import normalize_config
|
from axolotl.utils.config import normalize_config, validate_config
|
||||||
from axolotl.utils.data import prepare_dataset
|
from axolotl.utils.data import prepare_dataset
|
||||||
from axolotl.utils.data.rl import load_prepare_preference_datasets
|
from axolotl.utils.data.rl import load_prepare_preference_datasets
|
||||||
from axolotl.utils.data.utils import deduplicate_and_log_datasets
|
from axolotl.utils.data.utils import deduplicate_and_log_datasets
|
||||||
@@ -319,6 +319,7 @@ class TestDeduplicateNonRL(unittest.TestCase):
|
|||||||
"num_epochs": 1,
|
"num_epochs": 1,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
self.cfg_1 = validate_config(self.cfg_1)
|
||||||
normalize_config(self.cfg_1)
|
normalize_config(self.cfg_1)
|
||||||
|
|
||||||
@pytest.mark.skip(reason="TODO: fix hf hub offline to work with HF rate limits")
|
@pytest.mark.skip(reason="TODO: fix hf hub offline to work with HF rate limits")
|
||||||
|
|||||||
Reference in New Issue
Block a user