axolotl

tocmo0nlord/axolotl

Fork 0

Commit Graph

Select branches

Hide Pull Requests

1947fix

1991test

20230920-btlm

20231212-fixes

20240216-updates

20240307-updates

20240404-lisa-determinism

3181

4bit-optimizers

775-option-to-drop-vs-truncate-on-rows-longer-than-context-length

NanoCode012-patch-1

accelerator-args-builder

activation-offloading-torchtune

activations

activeblue/main

async-grpo-patched-v2

attention_enum

attn-implementation-refactor

attn-patches

autodoc

autogptq-tests

axolotl-ci-hf

base-model-readme-update

benchmark-callbacks-next

bursteratom-doc-faq-update

chat-dataset-tool

chat-template-granite

chore/docstring-distributed

cj_tokenizer_default_prompt_template

cli-cloud-modal-math-hard

cli-refactor

codecov-pulls-only

coderabbitai/docstrings/3e51a68

coderabbitai/docstrings/QVUilv72ojQNaYsCLVNpUpfo2rK1ZU5x90oPNXYz0ZfsWzWSHca36pjgaU5JOtZOA4gNjbjVYxShdRmkm7fGSlW

coderabbitai/docstrings/b234532

colab-misc-fixes

colab-misc-fixes-test

completion-json

compute-perplexity-metrics

cp-sdpa

cuda-12.8.1

custom-modeling

custom-trainer-cls

datasets-351

datasets-refactor

debug-hf-home-cache

deepspeed-low-cpu-mem

deepspeed_0_14_4

destroy-pg

dev-base

device-mesh

devstral-support

dft

diff-transformer

diffusion-custom-loss

diffusion-custom-models

diffusion-next-token-trainer

djsaunde-patch-1

docker-base-nvcr-pytorch

docker-cleanup-20231029

docs-lint-20250212

dpo-spawn-fix

dump-config

dynamic-sft

e2e-fsdp-trainer

embeddings-resize

enable_tp

eos-hell

exp-expand-len

fa-261

fa-check

fa3-hopper

feat/beautiful-readme

feat/glm45

feat/glmflash-other

feat/liger-deepseekv3

feat/linearize

feat/lmeval-baseten

feat/phi_35_vision

feat/pref_liger

feat/soap-optim-v2

feat/spaces-ui

feat/torchao-qlora

feat/wizard

feat_hqq

feature/attn-patches

feature/enable-huggingface-dataset-revision

feature/relora-rebased

fix-ddp_find_unused_parameters

fix-l3-lora

fix-merge-lint-issue

fix-preview

fix/cce-linear

fix/cp-waste

fix/diffusion

fix/doc-key

fix/dpo-labels

fix/eval-accu

fix/gemma3-text-only

fix/gemma3n-text-attention

fix/granite-speech

fix/hpc-root

fix/issue-1-build-deps

fix/issue-2-flash-attn-install

fix/issue-3-telemetry-whitelist

fix/issue-4-deepspeed-optional

fix/issue-5-8-docs

fix/issue-6-default-attention

fix/issue-7-hf-token-check

fix/kd-trainer-num-items

fix/merge-lora-fp32

fix/replace_jackllama

fix/rl-trainer-arg

fix/vllm-version

fix/xformers

fix_kto

fixtypo

flan-no-bos

flash-attn-2_5_5

flash-attn-fix-patches-wo-sample-packing

flex_patching_update

flx_attn_support

fp8

fsdp-defaults

fsdp-fft

fsdp-fix

fsdp-qdora

fsdp2

fsdp2_fp32

fused-mlp-ez

gh-pages

grouped_lr_squashed

grpo-path

grpo-path-v2

grpo-ref-model-cleanup

grpo_liger

hamelsmu-patch-1

hf-trainer-refactor

hymba_multipack2

ia3-peft

iterable-optional

jagged-restart-lr-scheduler-v3

kd-fix-20250519-v2

kd-logits-view

kd-logprob-data

kd-trainer

kd-trainer-2

kd-trainer-pre

kd-trainer-rebased

kd-trainer-v2

kd-trainer-zscore

keep_in_memory

kernelize-scattermoe-lora

kto_fix

kwargs-refactor

latent-space

lhl-moe-aux-loss-free

liger-063

liger-065

liger-dpo

lisa

llama-4-examples

llama-4-z3

llama-dropout

llama-flash-attn-fix

llama-multipack

llama4

llama4-patches

llava

llava-train

llmcompressor-sft

llmcompressor-sft-v2

llmcompressor-sft-wing

lora-fsdp2-doc

lora-kernels-deepspeed

lora-kernels-doc-fix

lora-quant-state-offset

lora_bf16

lora_kernels_fsdp

main

main-base

map-dataset-fetcher-fix

maverick-example

merge-lora-on-complete

merge-lora-tests

merged-2554

mistral-support

mixtral_optimized

mixtral_swiglu

mm2

mm3

mm_mc_chat

modal-upgrade-builder

model-loader-refactor

moekernels

mora

multi-gpu-state

multipack

multipack-dpo

multipack-pretraining

muon-validation

nca-pair

nd_parallel

neft-v2

no-bos-tokens-packing

no-seq-len

no-zero-ds-train

offload-activations-disk

olmo-no-position_ids

online-topk-kd

openorca

openorca-fix-mask

openorca-v2

optimizer-checkpoint

optimizer-compile

optimizers-refactor

packing-attn-limit-fa2-rebased

patch_lora_post_model_load

peft-update

phi-moe

pixtral_integration

pre-commit-update

preprocess_grpo-fix

pretrain-dataset

print_venv

pytest-each-flakey

pytest-skip-s2

q-galore

quantize-ptq-cli

quartodoc

quartodoc-fix

rala

rala-v2

reentrant-w-offloading

refactor-flash-attention

relaxed-recursive-transformers

release-0.10.x

release-0.8.x

release-v0.11.x

release-v0.12.x

release-v0.13.x

release-v0.9.x

remove-gptq-warn

revert-2332-fix_sample_packing

revert-2906-checkpoint-on-step-1

revert-multipack-changes

rl-trainers-sp

runpod-sls

sac

sageattention

save_only_model

scatter_moe

scatter_moe_eric

scattermoe-lora-optim-dtypestest

scattermoe-nanotron

sdpa-cp

sdpa-multipack

seq-parallel-ring

sequence-parallelism

shampoo

shampoo-low_bit

shared-prepared-ci

sharegpt-batched

sharegpt-field-conversations

smaller-rand-model

smol-ci

soap-optim

sp-fix-masking

sp-restore-buffers

sp-rl

sp-rl-v3

split-batches-sizes

sppo

squash_position_ids

ssmi-main

stable

streaming

streaming-on-the-fly-preprocess

streaming-remote-dataset

streaming-v2

swe-rebench-rl-rebase

telemetry

telemetry-opt-in

tensor-parallel

tensorboard-loss-check

testingci

textui

tinyllama-example

tool-mpm

topk-logprobs-triton

torch-211-base

torch_tensor_parallel

tp_support

train-refactor

transformers-4511

transformers-4513

transformers-4573

transformers-4_47_0_v2

transformers-fsdp-check

transformers-itl-refactor

tui

unsloth_modules

update-examples-llama3-ez

update-lgpl

update-vllm

upgrade-liger-test

upgrade-torchao-0.15

upgrade-trl-v0.12.0_2

upgrade_liger-tr4.46.1

uv-first

uv-fixup

vendor-moe

version-dev

vllm-0191

wait-distributed-close

weight-scale-norm

xformers-wo-packing

yayi2

zero3-8bit-lora

v0.1.0

v0.10.0

v0.10.1

v0.11.0

v0.11.0.post1

v0.12.0

v0.12.1

v0.12.2

v0.13.0

v0.13.1

v0.13.2

v0.14.0

v0.15.0

v0.16.0

v0.16.1

v0.2.0

v0.2.1

v0.3.0

v0.4.0

v0.5.0

v0.5.1

v0.5.1.post1

v0.5.2

v0.6.0

v0.7.0

v0.7.1

v0.8.0

v0.8.1

v0.9.0

v0.9.1

v0.9.1.post1

v0.9.2

301cc4c006 implement post training Eric Hartford 2024-03-15 13:16:06 -07:00
035e680631 Update test Casper Hansen 2024-03-15 13:58:12 +00:00
26fc10df01 Refactor names, bugfixes Casper Hansen 2024-03-15 12:39:11 +00:00
1bc008e901 Refactor creating FusedExperts Casper Hansen 2024-03-15 11:59:56 +00:00
3f7ed6a784 Bugfixes, test green Casper Hansen 2024-03-15 11:48:46 +00:00
feea977923 initial implementation, untested Casper 2024-03-15 11:54:36 +01:00
34eb4e1677 fix handling of ddp_find_unused_parameters fix-ddp_find_unused_parameters Wing Lian 2024-03-14 17:45:42 -04:00
8c171aadb4 drop unused padding_mask in llama patch llama-flash-attn-fix Wing Lian 2024-03-14 17:26:30 -04:00
8df7b888ff beta support for multipack with gemmoe: (#1402) Wing Lian 2024-03-14 15:52:23 -04:00
6366b0c212 Fix Gemma 7b qlora.yml (#1405) Sebastian Raschka 2024-03-14 14:44:38 -05:00
05bcc9ea56 Train parameters exclusively in specific ranges (#1390) Seungduk Kim 2024-03-15 00:05:42 +09:00
3bd8203c35 Don't disable existing loggers when configuring axolotl logging (#1395) Chirag Jain 2024-03-14 20:35:21 +05:30
8b12468230 Add QLoRA + FSDP Docs (#1403) Hamel Husain 2024-03-14 08:04:51 -07:00
0976781e15 Update ChatTemplate enum to include alpaca and gemma (#1396) Chirag Jain 2024-03-13 20:36:02 +05:30
8a82d2e0a4 add handling for argilla dpo-mix (#1397) Wing Lian 2024-03-12 17:17:10 -04:00
4326520829 chore: lint (#1389) Wing Lian 2024-03-10 21:02:55 -04:00
b7d8a7dc4d Add Glaive conversation format support (#1365) Brian Fitzgerald 2024-03-10 19:50:25 -05:00
b0ee9ec734 Set gradient_clipping to auto in DeepSpeed configs (#1382) [skip ci] Seungduk Kim 2024-03-11 09:50:12 +09:00
0bc114d2e1 Fix pydantic configuration for the max_memory input (#1385) [skip ci] David Baker 2024-03-11 00:50:04 +00:00
7659c001aa support for rslora (#1387) [skip ci] Wing Lian 2024-03-10 20:49:45 -04:00
3fd8093717 validation for fsdp and deepspeed (#1388) [skip ci] Wing Lian 2024-03-10 20:49:25 -04:00
9b6ee83a73 FDSP + QLoRA (#1378) Wing Lian 2024-03-08 14:31:01 -05:00
b7fe46579d make the conversations/messages field configurable for sharegpt sharegpt-field-conversations Wing Lian 2024-03-08 08:08:29 -05:00
638c2dafb5 JarvisLabs (#1372) Wing Lian 2024-03-07 10:47:32 -05:00
3b432346e3 WIP 20240307-updates Wing Lian 2024-03-07 08:30:13 -05:00
58b0d4b0d8 update flash attention for gemma support: (#1368) Wing Lian 2024-03-06 10:08:54 -05:00
ed70a08348 add docs for input_output format (#1367) [skip ci] Hamel Husain 2024-03-06 06:09:49 -08:00
0cfdb2c90c support for DoRA w/ PEFT (#1363) Wing Lian 2024-03-05 21:20:15 -05:00
37657473c8 Remove unsupported python version 3.9 from README (#1364) [skip ci] Nicolas Rojas 2024-03-05 21:19:36 -05:00
e0f1895408 add starcoder2 (#1349) Eric Hartford 2024-03-05 16:49:17 -08:00
8984bf1722 Update tinyllama lora.yml to fix eval packing issue (#1362) Sebastian Raschka 2024-03-05 13:36:29 -06:00
2598c9f045 allow the sharegpt handler to also better handle datasets destined for openai finetuning (#1361) Wing Lian 2024-03-05 11:43:33 -05:00
decb66e170 lora+ support (#1352) Wing Lian 2024-03-05 07:29:23 -05:00
4d09b42ee3 plain input/output prompt strategy w/o chat templates (#1346) Wing Lian 2024-03-04 16:25:16 -05:00
b5b44925ec Fix validation for early stopping (#1358) Chirag Jain 2024-03-04 08:45:18 +05:30
170d4d7092 chore: enable sample_packing for Gemma (#1351) NanoCode012 2024-03-02 11:56:22 +09:00
00018629e7 run tests again on Modal (#1289) [skip ci] Wing Lian 2024-02-29 14:26:26 -05:00
6b3b271925 fix for protected model_ namespace w pydantic (#1345) Wing Lian 2024-02-28 15:07:49 -05:00
3a5a2d2f34 Fix use_mlflow to be bool instead of str (#1344) Chirag Jain 2024-02-28 23:28:29 +05:30
6d4bbb877f deprecate py 3.9 support, set min pytorch version (#1343) [skip ci] Wing Lian 2024-02-28 12:58:05 -05:00
0f985e12fe more fixes 20240228 (#1342) [skip ci] Wing Lian 2024-02-28 12:57:45 -05:00
c1a7b3dd69 add gemma instruct chat template (#1341) Wing Lian 2024-02-27 17:20:01 -05:00
2b9687f341 Update fastchat_conversation_turns.py (#1294) [skip ci] Ikko Eltociear Ashimine 2024-02-27 23:06:10 +09:00
2c9c88b32a fix steps check for anneal on first cycle (#1316) Wing Lian 2024-02-27 08:56:08 -05:00
5265cd6b2c Update debugging.md (#1339) [skip ci] Hamel Husain 2024-02-26 22:47:31 -08:00
5be8b555a0 fix: checkpoint saving with deepspeed (#1321) NanoCode012 2024-02-27 15:46:44 +09:00
0f6af36d50 Mps mistral lora (#1292) [skip ci] Maxime 2024-02-27 04:39:57 +01:00
3f69571943 more pydantic fixes (#1338) Wing Lian 2024-02-26 22:39:13 -05:00
1e3d5305d3 Support user-defined prompt processing strategies for dpo (#1248) nopperl 2024-02-26 23:49:34 +00:00
16482796b0 add lion-pytorch optimizer (#1299) [skip ci] Maxime 2024-02-27 00:45:14 +01:00
f30d062b48 Add StableLM 2 Example Scripts (#1327) [skip ci] Nathan Cooper 2024-02-26 18:44:25 -05:00
269c5436ea hotfix to exclude_unset from pydantic config when converting back to a dict (#1334) Wing Lian 2024-02-26 15:06:25 -05:00
e7eed203d8 hotfix for missing outputs params (#1333) Wing Lian 2024-02-26 14:36:37 -05:00
cf002312e0 hotfix for lora rank (#1332) Wing Lian 2024-02-26 14:28:43 -05:00
7de912e097 hotfix for capabilities loading (#1331) Wing Lian 2024-02-26 14:24:28 -05:00
d75653407c ADD: push checkpoints to mlflow artifact registry (#1295) [skip ci] JohanWork 2024-02-26 19:32:39 +01:00
c6b01e0f4a chore: update readme to be more clear (#1326) [skip ci] NanoCode012 2024-02-27 03:32:13 +09:00
cc3cebfa70 Pydantic 2.x cfg (#1239) Wing Lian 2024-02-26 12:24:14 -05:00
5894f0e57e make mlflow optional (#1317) Wing Lian 2024-02-26 11:41:33 -05:00
5cf226e177 Use yaml codeblock for config.yaml field (#1303) [skip ci] kallewoof 2024-02-24 21:59:16 +09:00
2ed52bd568 fix(readme): Clarify doc for tokenizer_config (#1323) [skip ci] NanoCode012 2024-02-24 21:55:04 +09:00
718a8f4153 update flash attention to 2.5.5 for gemma flash-attn-2_5_5 Wing Lian 2024-02-21 23:32:44 -05:00
a359579371 deprecate: pytorch 2.0.1 image (#1315) [skip ci] NanoCode012 2024-02-22 11:39:47 +09:00
2752d5f958 multipack for gemma (#1313) Wing Lian 2024-02-21 19:24:21 -05:00
9e300aca0c Adding Google's gemma Model (#1312) Monk 2024-02-21 23:26:47 +05:30
3d2cd804ae fix(readme): update inference md link (#1311) [skip ci] NanoCode012 2024-02-22 02:48:06 +09:00
6ab69ec5f8 Add instructions for playing with qlora model to colab example (#1290) Jared Palmer 2024-02-21 09:46:27 -08:00
3c00f406d6 Allow load_best_model_at_end to be configured for early stopping on custom evaluation datasets (#1291) David Meikle 2024-02-21 15:57:18 +00:00
a7a9a1433a fix(examples): remove is_*_derived as it's parsed automatically (#1297) NanoCode012 2024-02-22 00:52:46 +09:00
e2786cce6a Validation always happens on first step (#1300) Leonardo Emili 2024-02-21 16:52:24 +01:00
d465b9fd98 wip, jagged restarts 20240216-updates Wing Lian 2024-02-16 14:34:08 -05:00
5a5d47458d Add seq2seq eval benchmark callback (#1274) Leonardo Emili 2024-02-13 17:24:30 +01:00
8430db22e2 Scheduler implementation of Continual Pre-Training of Large Language Models: How to (re)warm your model? (#1273) 김진원 2024-02-13 14:23:28 +09:00
4b997c3e1a allow the optimizer prune ratio for ReLoRA to be configurable (#1287) Wing Lian 2024-02-12 11:39:51 -08:00
e08df47584 wip load remote data from postgres streaming-remote-dataset Wing Lian 2024-02-12 09:55:24 -05:00
fac2d98c26 Add MPS support (#1264) Maxime 2024-02-12 14:30:32 +01:00
ea00dd0852 don't use load and push together (#1284) Wing Lian 2024-02-09 14:54:31 -05:00
b2a4cb4396 Update README.md (#1281) Hamel Husain 2024-02-09 07:38:08 -08:00
aaf54dc730 run the docker image builds and push on gh action gpu runners (#1218) Wing Lian 2024-02-09 10:32:54 -05:00
9bca7db133 add support for https remote yamls (#1277) Hamel Husain 2024-02-08 20:02:17 -08:00
91cf4ee72c allow remote data paths (#1278) Hamel Husain 2024-02-08 15:02:35 -08:00
39ad38a1fb update address and port for spaces feat/spaces-ui Wing Lian 2024-02-08 17:55:44 -05:00
1daecd161e copy edits (#1276) Wing Lian 2024-02-08 09:00:04 -05:00
4a654b331e Add link to axolotl cloud image on latitude (#1275) Wing Lian 2024-02-08 08:50:11 -05:00
ddb60883f5 create config Mads Henrichsen 2024-02-08 09:26:58 +01:00
a5724ef08d axolotl start training Mads Henrichsen 2024-02-07 18:16:21 +01:00
5698943263 simplify haldning for newer multipack patches so they can be added in a single place (#1270) Wing Lian 2024-02-07 10:46:04 -05:00
411293bdca contributor avatars (#1269) Wing Lian 2024-02-07 10:09:01 -05:00
190930b5df spaces ui Mads Henrichsen 2024-02-07 15:52:30 +01:00
73f1bdaa15 Fix bug preventing model_kwargs being injected (#1262) Zac Brannelly 2024-02-08 01:38:35 +11:00
1c7ed26785 lock pytorch (#1247) [skip ci] JohanWork 2024-02-06 13:48:26 +01:00
13eea21f9b Add more save strategies for DPO training. (#1255) Philip May 2024-02-06 06:38:43 +01:00
1072f28874 Fix typo bloat16 -> bfloat16 (#1257) Chirag Jain 2024-02-06 11:08:14 +05:30
c7cf3810bd Pretrain transforms (#1261) Wing Lian 2024-02-06 00:37:03 -05:00
8c2e05ade3 relora: magnitude pruning of the optimizer (#1245) Wing Lian 2024-02-06 00:35:30 -05:00
2d65f470d5 fix(model): apply gate fp32 only for mixtral (#1241) NanoCode012 2024-02-02 03:55:05 +09:00
dfd188502a add contact info for dedicated support for axolotl [skip ci] (#1243) Wing Lian 2024-02-01 12:59:07 -05:00
00568c1539 support for true batches with multipack (#1230) Wing Lian 2024-02-01 10:18:42 -05:00
d46d7dfe30 wip multipack-dpo Wing Lian 2024-02-01 00:28:16 -05:00
c67fb71583 Peft deepspeed resume (#1227) Wing Lian 2024-01-31 18:13:29 -05:00

... 41 42 43 44 45 ...