axolotl

tocmo0nlord/axolotl

Fork 0

Commit Graph

Select branches

Hide Pull Requests

1947fix

1991test

20230920-btlm

20231212-fixes

20240216-updates

20240307-updates

20240404-lisa-determinism

3181

4bit-optimizers

775-option-to-drop-vs-truncate-on-rows-longer-than-context-length

NanoCode012-patch-1

accelerator-args-builder

activation-offloading-torchtune

activations

activeblue/main

async-grpo-patched-v2

attention_enum

attn-implementation-refactor

attn-patches

autodoc

autogptq-tests

axolotl-ci-hf

base-model-readme-update

benchmark-callbacks-next

bursteratom-doc-faq-update

chat-dataset-tool

chat-template-granite

chore/docstring-distributed

cj_tokenizer_default_prompt_template

cli-cloud-modal-math-hard

cli-refactor

codecov-pulls-only

coderabbitai/docstrings/3e51a68

coderabbitai/docstrings/QVUilv72ojQNaYsCLVNpUpfo2rK1ZU5x90oPNXYz0ZfsWzWSHca36pjgaU5JOtZOA4gNjbjVYxShdRmkm7fGSlW

coderabbitai/docstrings/b234532

colab-misc-fixes

colab-misc-fixes-test

completion-json

compute-perplexity-metrics

cp-sdpa

cuda-12.8.1

custom-modeling

custom-trainer-cls

datasets-351

datasets-refactor

debug-hf-home-cache

deepspeed-low-cpu-mem

deepspeed_0_14_4

destroy-pg

dev-base

device-mesh

devstral-support

dft

diff-transformer

diffusion-custom-loss

diffusion-custom-models

diffusion-next-token-trainer

djsaunde-patch-1

docker-base-nvcr-pytorch

docker-cleanup-20231029

docs-lint-20250212

dpo-spawn-fix

dump-config

dynamic-sft

e2e-fsdp-trainer

embeddings-resize

enable_tp

eos-hell

exp-expand-len

fa-261

fa-check

fa3-hopper

feat/beautiful-readme

feat/glm45

feat/glmflash-other

feat/liger-deepseekv3

feat/linearize

feat/lmeval-baseten

feat/phi_35_vision

feat/pref_liger

feat/soap-optim-v2

feat/spaces-ui

feat/torchao-qlora

feat/wizard

feat_hqq

feature/attn-patches

feature/enable-huggingface-dataset-revision

feature/relora-rebased

fix-ddp_find_unused_parameters

fix-l3-lora

fix-merge-lint-issue

fix-preview

fix/cce-linear

fix/cp-waste

fix/diffusion

fix/doc-key

fix/dpo-labels

fix/eval-accu

fix/gemma3-text-only

fix/gemma3n-text-attention

fix/granite-speech

fix/hpc-root

fix/issue-1-build-deps

fix/issue-2-flash-attn-install

fix/issue-3-telemetry-whitelist

fix/issue-4-deepspeed-optional

fix/issue-5-8-docs

fix/issue-6-default-attention

fix/issue-7-hf-token-check

fix/kd-trainer-num-items

fix/merge-lora-fp32

fix/replace_jackllama

fix/rl-trainer-arg

fix/vllm-version

fix/xformers

fix_kto

fixtypo

flan-no-bos

flash-attn-2_5_5

flash-attn-fix-patches-wo-sample-packing

flex_patching_update

flx_attn_support

fp8

fsdp-defaults

fsdp-fft

fsdp-fix

fsdp-qdora

fsdp2

fsdp2_fp32

fused-mlp-ez

gh-pages

grouped_lr_squashed

grpo-path

grpo-path-v2

grpo-ref-model-cleanup

grpo_liger

hamelsmu-patch-1

hf-trainer-refactor

hymba_multipack2

ia3-peft

iterable-optional

jagged-restart-lr-scheduler-v3

kd-fix-20250519-v2

kd-logits-view

kd-logprob-data

kd-trainer

kd-trainer-2

kd-trainer-pre

kd-trainer-rebased

kd-trainer-v2

kd-trainer-zscore

keep_in_memory

kernelize-scattermoe-lora

kto_fix

kwargs-refactor

latent-space

lhl-moe-aux-loss-free

liger-063

liger-065

liger-dpo

lisa

llama-4-examples

llama-4-z3

llama-dropout

llama-flash-attn-fix

llama-multipack

llama4

llama4-patches

llava

llava-train

llmcompressor-sft

llmcompressor-sft-v2

llmcompressor-sft-wing

lora-fsdp2-doc

lora-kernels-deepspeed

lora-kernels-doc-fix

lora-quant-state-offset

lora_bf16

lora_kernels_fsdp

main

main-base

map-dataset-fetcher-fix

maverick-example

merge-lora-on-complete

merge-lora-tests

merged-2554

mistral-support

mixtral_optimized

mixtral_swiglu

mm2

mm3

mm_mc_chat

modal-upgrade-builder

model-loader-refactor

moekernels

mora

multi-gpu-state

multipack

multipack-dpo

multipack-pretraining

muon-validation

nca-pair

nd_parallel

neft-v2

no-bos-tokens-packing

no-seq-len

no-zero-ds-train

offload-activations-disk

olmo-no-position_ids

online-topk-kd

openorca

openorca-fix-mask

openorca-v2

optimizer-checkpoint

optimizer-compile

optimizers-refactor

packing-attn-limit-fa2-rebased

patch_lora_post_model_load

peft-update

phi-moe

pixtral_integration

pre-commit-update

preprocess_grpo-fix

pretrain-dataset

print_venv

pytest-each-flakey

pytest-skip-s2

q-galore

quantize-ptq-cli

quartodoc

quartodoc-fix

rala

rala-v2

reentrant-w-offloading

refactor-flash-attention

relaxed-recursive-transformers

release-0.10.x

release-0.8.x

release-v0.11.x

release-v0.12.x

release-v0.13.x

release-v0.9.x

remove-gptq-warn

revert-2332-fix_sample_packing

revert-2906-checkpoint-on-step-1

revert-multipack-changes

rl-trainers-sp

runpod-sls

sac

sageattention

save_only_model

scatter_moe

scatter_moe_eric

scattermoe-lora-optim-dtypestest

scattermoe-nanotron

sdpa-cp

sdpa-multipack

seq-parallel-ring

sequence-parallelism

shampoo

shampoo-low_bit

shared-prepared-ci

sharegpt-batched

sharegpt-field-conversations

smaller-rand-model

smol-ci

soap-optim

sp-fix-masking

sp-restore-buffers

sp-rl

sp-rl-v3

split-batches-sizes

sppo

squash_position_ids

ssmi-main

stable

streaming

streaming-on-the-fly-preprocess

streaming-remote-dataset

streaming-v2

swe-rebench-rl-rebase

telemetry

telemetry-opt-in

tensor-parallel

tensorboard-loss-check

testingci

textui

tinyllama-example

tool-mpm

topk-logprobs-triton

torch-211-base

torch_tensor_parallel

tp_support

train-refactor

transformers-4511

transformers-4513

transformers-4573

transformers-4_47_0_v2

transformers-fsdp-check

transformers-itl-refactor

tui

unsloth_modules

update-examples-llama3-ez

update-lgpl

update-vllm

upgrade-liger-test

upgrade-torchao-0.15

upgrade-trl-v0.12.0_2

upgrade_liger-tr4.46.1

uv-first

uv-fixup

vendor-moe

version-dev

vllm-0191

wait-distributed-close

weight-scale-norm

xformers-wo-packing

yayi2

zero3-8bit-lora

v0.1.0

v0.10.0

v0.10.1

v0.11.0

v0.11.0.post1

v0.12.0

v0.12.1

v0.12.2

v0.13.0

v0.13.1

v0.13.2

v0.14.0

v0.15.0

v0.16.0

v0.16.1

v0.2.0

v0.2.1

v0.3.0

v0.4.0

v0.5.0

v0.5.1

v0.5.1.post1

v0.5.2

v0.6.0

v0.7.0

v0.7.1

v0.8.0

v0.8.1

v0.9.0

v0.9.1

v0.9.1.post1

v0.9.2

25e037fe2d Support for additional_special_tokens (#1221) [skip ci] DreamGenX 2024-02-01 00:13:13 +01:00
52c83d30bf Update rlhf.md (#1237) [skip ci] Hamel Husain 2024-01-31 17:27:35 -05:00
047d9e1d5b helper utils Wing Lian 2024-01-31 12:49:29 -05:00
88a0c05d2c wip Wing Lian 2024-01-31 12:07:39 -05:00
d113331e9a add a helpful motd for cloud image (#1235) [skip ci] Wing Lian 2024-01-31 10:26:02 -05:00
8f2b591baf set torch version to what is installed during axolotl install (#1234) Wing Lian 2024-01-31 08:47:34 -05:00
5787e1a23f Fix and document test_datasets (#1228) DreamGenX 2024-01-31 12:48:57 +01:00
8608d8003e Fix typo (#1231) [skip ci] xhedit 2024-01-31 06:46:55 -05:00
4cb7900a56 Peft lotfq (#1222) Wing Lian 2024-01-28 18:50:08 -05:00
18f811978c FEAT: add tagging support to axolotl for DPOTrainer (#1209) Filippo Broggini 2024-01-27 02:01:57 +01:00
afb5dd9655 Update FUNDING.yml [skip ci] Wing Lian 2024-01-26 20:00:28 -05:00
8da1633124 Revert "run PR e2e docker CI tests in Modal" (#1220) [skip ci] Wing Lian 2024-01-26 16:50:44 -05:00
36d053f6f0 run PR e2e docker CI tests in Modal (#1217) [skip ci] Wing Lian 2024-01-26 16:13:27 -05:00
af29d81f80 ADD: warning if hub_model_id ist set but not any save strategy (#1202) JohanWork 2024-01-26 16:38:55 +01:00
1b180034c7 ensure the tests use the same version of torch as the latest base docker images (#1215) [skip ci] Wing Lian 2024-01-26 10:38:30 -05:00
62ca4a2b71 Respect sliding_window=None (#1214) DreamGenX 2024-01-26 13:43:37 +01:00
5407ddd233 Update qlora.yml - remove max_packed_sequence_len (#1210) [skip ci] Igor Berlenko 2024-01-26 20:43:05 +08:00
1a538be9c2 add a prelim test for expading the 4d mask sdpa-multipack Wing Lian 2024-01-26 00:41:24 -05:00
34de5b3bd5 extras for the various flash attn subdirs and build those in the base module as it is a slow step flash-attn-fix-patches-wo-sample-packing Wing Lian 2024-01-26 00:40:39 -05:00
74c72ca5eb drop py39 docker images, add py311, upgrade pytorch to 2.1.2 (#1205) Wing Lian 2024-01-26 00:38:49 -05:00
a1d168d314 break out the additional llama patches from the flash attn w multipack patch Wing Lian 2024-01-25 23:06:52 -05:00
e923e62d24 more checks and fixes for deepspeed and fsdp (#1208) [skip ci] Wing Lian 2024-01-25 20:01:45 -05:00
ba944e6554 workaround for transformers bug requireing do_sample for saveing pretrained (#1206) Wing Lian 2024-01-25 11:34:41 -05:00
badda3783b make sure to register the base chatml template even if no system message is provided (#1207) Wing Lian 2024-01-25 10:38:08 -05:00
a01b998c0f Update deps 202401 (#1204) [skip ci] Wing Lian 2024-01-25 10:11:49 -05:00
33e117088f precompute dpo logprobs setting and fixes (#1199) [skip ci] Wing Lian 2024-01-25 09:31:55 -05:00
b4ac96adef fix learning rate scheduler's warnings (#1135) [skip ci] Ricardo Dominguez-Olmedo 2024-01-25 13:09:34 +01:00
98b4762077 Feat/chatml add system message (#1117) mhenrichsen 2024-01-25 08:24:27 +01:00
ee0b5f60e5 add colab example (#1196) [skip ci] JohanWork 2024-01-25 02:09:09 +01:00
08719b9609 fix(log): improve warning to clarify that lora_modules_to_save expect a list (#1197) NanoCode012 2024-01-25 10:08:34 +09:00
1427d5b502 prepare for release 0.4.0 (#1175) v0.4.0 Wing Lian 2024-01-24 15:00:28 -05:00
54d2ac155b Mixtral fixes 20240124 (#1192) [skip ci] Wing Lian 2024-01-24 14:59:57 -05:00
af0243021c Standardize system prompt format for AlpacaPrompter (#1190) [skip ci] Oleh Kuznetsov 2024-01-24 20:27:01 +01:00
8a49309489 upgrade deepspeed to 0.13.1 for mixtral fixes (#1189) [skip ci] Wing Lian 2024-01-24 14:26:40 -05:00
5bce45f800 more dpo fixes for dataset loading and docs (#1185) [skip ci] Wing Lian 2024-01-24 14:23:55 -05:00
d85d4942cf report min lenght of tokenized data (#1186) [skip ci] Wing Lian 2024-01-24 09:17:50 -05:00
02f2c720fc Fix generation_config validation raises Exception for do_merge_lora (#1184) Agung Baptiso Sorlawan 2024-01-24 12:42:15 +07:00
71141deb18 Add support for offline mode with HF_HUB_OFFLINE envvar (#1182) James Wade 2024-01-24 00:41:47 -05:00
dc051b861d Update rlhf.md (#1178) [skip ci] Aleksey Korshuk 2024-01-23 23:54:51 +03:00
59a31fe613 DPO fixes v2 (#1174) Wing Lian 2024-01-23 12:56:24 -05:00
814aee6603 Phi2 multipack (#1173) Wing Lian 2024-01-23 12:54:36 -05:00
b715cd549a update docs [skip ci] (#1176) Wing Lian 2024-01-23 11:14:52 -05:00
fb7f9b9516 don't fail if can't cast weights due to offload when merging (#1172) [skip ci] Wing Lian 2024-01-23 09:17:08 -05:00
cc250391a0 Fine-Tuning Mistral-7b for Real-World Chatbot Applications Using Axolotl (Lora used) (#1155) Tilemachos Chatzipapas 2024-01-23 14:32:21 +02:00
9135b9e2aa Update README.md (#1169) [skip ci] Ayush Singh 2024-01-23 04:25:44 -08:00
7523d1f557 DPO cleanup (#1126) Wing Lian 2024-01-23 00:40:37 -05:00
5439707489 Feat(test): Add tests for alpaca chatml prompt tokenizer (#1088) JohanWork 2024-01-23 05:30:26 +01:00
684038111e Add desc to map/filter (#1162) Casper 2024-01-23 03:30:53 +01:00
cda52dc32b support for explicit test_dataset definition for evals (#786) Wing Lian 2024-01-22 21:29:56 -05:00
e799e08d3c Falcon embeddings (#1149) [skip docker] Wing Lian 2024-01-22 21:01:42 -05:00
0f77b8d798 add commit message option to skip docker image builds in ci (#1168) [skip ci] Wing Lian 2024-01-22 19:55:36 -05:00
32580c1ca7 Vram fix attempt (#1164) [skip ci] Wing Lian 2024-01-22 19:54:54 -05:00
802f9667a2 improve vram use w gradient checkpointing (#1167) [skip ci] Wing Lian 2024-01-22 19:48:22 -05:00
b8e5603467 Add mlflow callback for pushing config to mlflow artifacts (#1125) JohanWork 2024-01-23 00:44:39 +01:00
782b6a4216 set fp16 to false if bf16, update bf16: auto in example YAMLs (#1122) [skip ci] Wing Lian 2024-01-22 18:44:01 -05:00
eaaeefce55 jupyter lab fixes (#1139) [skip ci] Wing Lian 2024-01-22 18:42:40 -05:00
f5a828aa20 Qwen2 (#1166) Wing Lian 2024-01-22 18:24:15 -05:00
fccb542b47 make sure the model config loader respects the model_revision too (#1160) [skip-ci] Wing Lian 2024-01-22 13:23:14 -05:00
2ce5c0d68a Deprecate max packed sequence len (#1141) Wing Lian 2024-01-20 05:11:50 -05:00
3db5f2fd17 feat(dataset): add config to keep processed dataset in memory (#1152) NanoCode012 2024-01-20 13:19:28 +09:00
cbecf3e62a fix check for env var (#1151) Wing Lian 2024-01-18 23:58:11 -05:00
729740df81 Dockerfile cloud ports (#1148) Wing Lian 2024-01-18 22:04:25 -05:00
08b8ba09a5 Fix link for Minotaur model (#1146) [skip-ci] Joe Cummings 2024-01-18 17:22:04 -05:00
6910e6a8ca Multipack simplify for Mixtral (#1142) Wing Lian 2024-01-18 16:23:49 -05:00
1d70f24b50 Add shifted sparse attention (#973) [skip-ci] Joe Cummings 2024-01-18 10:16:07 -05:00
317fa2555a fix bf16 check when preprocessing data (#1140) Wing Lian 2024-01-17 22:41:23 -05:00
1e56b88cde fix(preprocess): Make sure dataset not loaded from cache when using preprocess cli (#1136) NanoCode012 2024-01-18 03:03:52 +09:00
7570446596 Preprocess dataset size fix (#1131) Wing Lian 2024-01-17 11:02:41 -05:00
1b33588f09 use low_cpu_mem_usage with ds zero 1 or 2 deepspeed-low-cpu-mem Wing Lian 2024-01-16 19:33:44 -05:00
1b59a3e698 use low_cpu_mem_usage when using deepspeed Wing Lian 2024-01-16 07:44:35 -05:00
ece0211996 Agnostic cloud gpu docker image and Jupyter lab (#1097) Wing Lian 2024-01-15 22:37:54 -05:00
8487b97cf3 Add layers_to_transform for lora_config (#1118) xzuyn 2024-01-15 21:29:55 -05:00
9cd27b2f91 fix(readme): clarify custom user prompt [no-ci] (#1124) NanoCode012 2024-01-16 09:47:33 +09:00
eea6e8303a Disable datasets caching when preparing dataset for packing keep_in_memory Casper 2024-01-15 23:48:24 +01:00
c1b741d9fb pin model_revision for phi2 (#1123) Wing Lian 2024-01-14 17:31:51 -05:00
0abf4d6504 update PR template so we can capture twitter or discord handles (#1121) [skip ci] Wing Lian 2024-01-14 16:19:01 -05:00
086561326f Enable or disable bf16 support based on availability (#1116) Simon Hällqvist 2024-01-14 18:06:56 +01:00
2202a20f60 Reverse caching PR (#1115) Casper 2024-01-13 16:17:40 +01:00
d66b10141e Disable caching on --disable_caching in CLI (#1110) Casper 2024-01-13 10:13:35 +01:00
304ea1b814 Update debugging.md (#1111) Hamel Husain 2024-01-12 21:07:31 -08:00
da97285e63 keep gate in fp32 for 16 bit loras (#1105) Wing Lian 2024-01-12 14:58:21 -05:00
2dc431078c Add link on README to Docker Debugging (#1107) Hamel Husain 2024-01-12 05:51:35 -08:00
6d342b52a4 Add section for debugging with Docker (#1104) Hamel Husain 2024-01-11 18:43:33 -08:00
b502392e82 Update README.md (#1103) Hamel Husain 2024-01-11 16:41:58 -08:00
44ba616da2 Fix broken pypi.yml (#1099) [skip ci] Mark Saroufim 2024-01-11 12:35:31 -05:00
b432889256 feat: enable trl's autounwrap (#1060) NanoCode012 2024-01-11 22:43:41 +09:00
54fe07a905 Fix debugging.md (#1091) Hamel Husain 2024-01-10 21:44:40 -08:00
7512c3ad20 Add Debugging Guide (#1089) Hamel Husain 2024-01-10 20:49:24 -08:00
78c5b1979e add gptneox embeddings, fix phi2 inputs, also fix the casting (#1083) Wing Lian 2024-01-10 22:32:43 -05:00
23495a80af misc fixes from #943 (#1086) [skip ci] Wing Lian 2024-01-10 22:31:36 -05:00
91502b98d4 Remove fused-dense-lib from requirements.txt (#1087) Casper 2024-01-10 21:26:41 +01:00
6c19e9302a add python 3.11 to the matrix for unit tests (#1085) [skip ci] Wing Lian 2024-01-10 13:02:01 -05:00
90036ebbc6 optimize calculation of cu_seqlens from position_ids (#1084) [skip ci] Wing Lian 2024-01-10 11:54:50 -05:00
9032e610b1 use tags again for test image, only run docker e2e after pre-commit checks (#1081) Wing Lian 2024-01-10 09:04:56 -05:00
d69ba2b0b7 fix: warn user to install mamba_ssm package (#1019) NanoCode012 2024-01-10 16:50:56 +09:00
9e3f0cb5a7 pin accelerate for deepspeed fix (#1080) Wing Lian 2024-01-10 00:50:04 -05:00
2f2582e6ed additional logging to get maximum token length of a sequence in the dataset (#1066) [skip ci] Wing Lian 2024-01-10 00:49:31 -05:00
0ce1a6594e update sharegpt conversations when chatml chat template is set (#1075) [skip ci] Wing Lian 2024-01-10 00:49:07 -05:00
043c3860cd fix: train_on_inputs: true ignored for sharegpt (#1045) [skip ci] NanoCode012 2024-01-10 13:00:09 +09:00
0f100800e3 be more robust about checking embedding modules for lora finetunes (#1074) [skip ci] Wing Lian 2024-01-09 22:58:54 -05:00

... 42 43 44 45 46 ...