axolotl

tocmo0nlord/axolotl

Fork 0

Commit Graph

Select branches

Hide Pull Requests

1947fix

1991test

20230920-btlm

20231212-fixes

20240216-updates

20240307-updates

20240404-lisa-determinism

3181

4bit-optimizers

775-option-to-drop-vs-truncate-on-rows-longer-than-context-length

NanoCode012-patch-1

accelerator-args-builder

activation-offloading-torchtune

activations

activeblue/main

async-grpo-patched-v2

attention_enum

attn-implementation-refactor

attn-patches

autodoc

autogptq-tests

axolotl-ci-hf

base-model-readme-update

benchmark-callbacks-next

bursteratom-doc-faq-update

chat-dataset-tool

chat-template-granite

chore/docstring-distributed

cj_tokenizer_default_prompt_template

cli-cloud-modal-math-hard

cli-refactor

codecov-pulls-only

coderabbitai/docstrings/3e51a68

coderabbitai/docstrings/QVUilv72ojQNaYsCLVNpUpfo2rK1ZU5x90oPNXYz0ZfsWzWSHca36pjgaU5JOtZOA4gNjbjVYxShdRmkm7fGSlW

coderabbitai/docstrings/b234532

colab-misc-fixes

colab-misc-fixes-test

completion-json

compute-perplexity-metrics

cp-sdpa

cuda-12.8.1

custom-modeling

custom-trainer-cls

datasets-351

datasets-refactor

debug-hf-home-cache

deepspeed-low-cpu-mem

deepspeed_0_14_4

destroy-pg

dev-base

device-mesh

devstral-support

dft

diff-transformer

diffusion-custom-loss

diffusion-custom-models

diffusion-next-token-trainer

djsaunde-patch-1

docker-base-nvcr-pytorch

docker-cleanup-20231029

docs-lint-20250212

dpo-spawn-fix

dump-config

dynamic-sft

e2e-fsdp-trainer

embeddings-resize

enable_tp

eos-hell

exp-expand-len

fa-261

fa-check

fa3-hopper

feat/beautiful-readme

feat/glm45

feat/glmflash-other

feat/liger-deepseekv3

feat/linearize

feat/lmeval-baseten

feat/phi_35_vision

feat/pref_liger

feat/soap-optim-v2

feat/spaces-ui

feat/torchao-qlora

feat/wizard

feat_hqq

feature/attn-patches

feature/enable-huggingface-dataset-revision

feature/relora-rebased

fix-ddp_find_unused_parameters

fix-l3-lora

fix-merge-lint-issue

fix-preview

fix/cce-linear

fix/cp-waste

fix/diffusion

fix/doc-key

fix/dpo-labels

fix/eval-accu

fix/gemma3-text-only

fix/gemma3n-text-attention

fix/granite-speech

fix/hpc-root

fix/issue-1-build-deps

fix/issue-2-flash-attn-install

fix/issue-3-telemetry-whitelist

fix/issue-4-deepspeed-optional

fix/issue-5-8-docs

fix/issue-6-default-attention

fix/issue-7-hf-token-check

fix/kd-trainer-num-items

fix/merge-lora-fp32

fix/replace_jackllama

fix/rl-trainer-arg

fix/vllm-version

fix/xformers

fix_kto

fixtypo

flan-no-bos

flash-attn-2_5_5

flash-attn-fix-patches-wo-sample-packing

flex_patching_update

flx_attn_support

fp8

fsdp-defaults

fsdp-fft

fsdp-fix

fsdp-qdora

fsdp2

fsdp2_fp32

fused-mlp-ez

gh-pages

grouped_lr_squashed

grpo-path

grpo-path-v2

grpo-ref-model-cleanup

grpo_liger

hamelsmu-patch-1

hf-trainer-refactor

hymba_multipack2

ia3-peft

iterable-optional

jagged-restart-lr-scheduler-v3

kd-fix-20250519-v2

kd-logits-view

kd-logprob-data

kd-trainer

kd-trainer-2

kd-trainer-pre

kd-trainer-rebased

kd-trainer-v2

kd-trainer-zscore

keep_in_memory

kernelize-scattermoe-lora

kto_fix

kwargs-refactor

latent-space

lhl-moe-aux-loss-free

liger-063

liger-065

liger-dpo

lisa

llama-4-examples

llama-4-z3

llama-dropout

llama-flash-attn-fix

llama-multipack

llama4

llama4-patches

llava

llava-train

llmcompressor-sft

llmcompressor-sft-v2

llmcompressor-sft-wing

lora-fsdp2-doc

lora-kernels-deepspeed

lora-kernels-doc-fix

lora-quant-state-offset

lora_bf16

lora_kernels_fsdp

main

main-base

map-dataset-fetcher-fix

maverick-example

merge-lora-on-complete

merge-lora-tests

merged-2554

mistral-support

mixtral_optimized

mixtral_swiglu

mm2

mm3

mm_mc_chat

modal-upgrade-builder

model-loader-refactor

moekernels

mora

multi-gpu-state

multipack

multipack-dpo

multipack-pretraining

muon-validation

nca-pair

nd_parallel

neft-v2

no-bos-tokens-packing

no-seq-len

no-zero-ds-train

offload-activations-disk

olmo-no-position_ids

online-topk-kd

openorca

openorca-fix-mask

openorca-v2

optimizer-checkpoint

optimizer-compile

optimizers-refactor

packing-attn-limit-fa2-rebased

patch_lora_post_model_load

peft-update

phi-moe

pixtral_integration

pre-commit-update

preprocess_grpo-fix

pretrain-dataset

print_venv

pytest-each-flakey

pytest-skip-s2

q-galore

quantize-ptq-cli

quartodoc

quartodoc-fix

rala

rala-v2

reentrant-w-offloading

refactor-flash-attention

relaxed-recursive-transformers

release-0.10.x

release-0.8.x

release-v0.11.x

release-v0.12.x

release-v0.13.x

release-v0.9.x

remove-gptq-warn

revert-2332-fix_sample_packing

revert-2906-checkpoint-on-step-1

revert-multipack-changes

rl-trainers-sp

runpod-sls

sac

sageattention

save_only_model

scatter_moe

scatter_moe_eric

scattermoe-lora-optim-dtypestest

scattermoe-nanotron

sdpa-cp

sdpa-multipack

seq-parallel-ring

sequence-parallelism

shampoo

shampoo-low_bit

shared-prepared-ci

sharegpt-batched

sharegpt-field-conversations

smaller-rand-model

smol-ci

soap-optim

sp-fix-masking

sp-restore-buffers

sp-rl

sp-rl-v3

split-batches-sizes

sppo

squash_position_ids

ssmi-main

stable

streaming

streaming-on-the-fly-preprocess

streaming-remote-dataset

streaming-v2

swe-rebench-rl-rebase

telemetry

telemetry-opt-in

tensor-parallel

tensorboard-loss-check

testingci

textui

tinyllama-example

tool-mpm

topk-logprobs-triton

torch-211-base

torch_tensor_parallel

tp_support

train-refactor

transformers-4511

transformers-4513

transformers-4573

transformers-4_47_0_v2

transformers-fsdp-check

transformers-itl-refactor

tui

unsloth_modules

update-examples-llama3-ez

update-lgpl

update-vllm

upgrade-liger-test

upgrade-torchao-0.15

upgrade-trl-v0.12.0_2

upgrade_liger-tr4.46.1

uv-first

uv-fixup

vendor-moe

version-dev

vllm-0191

wait-distributed-close

weight-scale-norm

xformers-wo-packing

yayi2

zero3-8bit-lora

v0.1.0

v0.10.0

v0.10.1

v0.11.0

v0.11.0.post1

v0.12.0

v0.12.1

v0.12.2

v0.13.0

v0.13.1

v0.13.2

v0.14.0

v0.15.0

v0.16.0

v0.16.1

v0.2.0

v0.2.1

v0.3.0

v0.4.0

v0.5.0

v0.5.1

v0.5.1.post1

v0.5.2

v0.6.0

v0.7.0

v0.7.1

v0.8.0

v0.8.1

v0.9.0

v0.9.1

v0.9.1.post1

v0.9.2

34d3c8dcfb [docs] Update README Quickstart to use CLI (#2137) Wing Lian 2024-12-09 14:03:19 -05:00
ab4b32187d need to update deepspeed version in extras too (#2161) [skip ci] Wing Lian 2024-12-09 14:01:44 -05:00
5d6b088997 fix: chat_template masking due to truncation, consolidate turn build and keys within field (#2123) [skip ci] NanoCode012 2024-12-10 01:49:38 +07:00
3862267040 don't add dataset tags if empty due to all local data paths (#2162) [skip ci] Wing Lian 2024-12-09 13:49:18 -05:00
c78de6f214 feat: add kto example (#2158) [skip ci] NanoCode012 2024-12-09 20:17:27 +07:00
b1e8286c57 add missing __init__ to optimizers path (#2160) [skip ci] Wing Lian 2024-12-09 08:17:08 -05:00
57a8c54682 Built site for gh-pages Quarto GHA Workflow Runner 2024-12-09 12:27:34 +00:00
40907c6887 upgrade deepspeed to 0.16.1 (#2157) Wing Lian 2024-12-09 07:25:10 -05:00
6a342feda2 fix: duplicate mlflow logging (#2109) [skip ci] NanoCode012 2024-12-09 19:24:48 +07:00
2de866e92f revert seq len to 8192 Sunny 2024-12-08 22:30:20 -05:00
295e07dcca settings Sunny 2024-12-08 22:22:18 -05:00
95d8c5aa3a Built site for gh-pages Quarto GHA Workflow Runner 2024-12-09 02:10:08 +00:00
0c25bc07a2 use manual version for now (#2156) Wing Lian 2024-12-08 21:09:12 -05:00
7e347cec5e Built site for gh-pages Quarto GHA Workflow Runner 2024-12-08 21:40:12 +00:00
343a4d8855 Fixing issue#2134 Axolotl Crashes At The End Of Training If Base Model Is Local (#2140) Sunny Liu 2024-12-08 16:39:05 -05:00
393853751e add additional fft deepspeed variants (#2153) [skip ci] Wing Lian 2024-12-08 16:38:47 -05:00
39ab9626f1 add transformers module to cleanup e2e-fsdp-trainer Wing Lian 2024-12-08 14:52:54 -05:00
26bd81cec0 re-enable tests w change in patching Wing Lian 2024-12-08 14:52:09 -05:00
d31729dc4e Built site for gh-pages Quarto GHA Workflow Runner 2024-12-08 19:51:36 +00:00
1302e31049 Transformers version flexibility and FSDP optimizer patch (#2155) Wing Lian 2024-12-08 14:50:40 -05:00
5dba7f2868 Built site for gh-pages Quarto GHA Workflow Runner 2024-12-08 03:25:11 +00:00
be5f554a62 bump autoawq to 0.2.7.post3 (#2150) Wing Lian 2024-12-07 22:24:09 -05:00
22319182ab fix for auto_map check when using remote code and multipack for models like deepseek (#2151) [skip ci] Wing Lian 2024-12-07 22:23:52 -05:00
440aab8a6f add --version support to axolotl cli (#2152) [skip ci] Wing Lian 2024-12-07 22:23:33 -05:00
51d722db83 Built site for gh-pages Quarto GHA Workflow Runner 2024-12-07 22:25:38 +00:00
3afc91fba9 run 2.5.1 test without waiting for 1st e2e docker-base-nvcr-pytorch Wing Lian 2024-12-07 08:47:36 -05:00
0689419d25 use pr base tag Wing Lian 2024-12-07 05:05:02 -05:00
e64c32c0bd push test build Wing Lian 2024-12-06 20:15:02 -05:00
ec819dde3b attempt to build the test images Wing Lian 2024-12-06 19:50:08 -05:00
fdf4bb5087 fix default base image Wing Lian 2024-12-06 11:35:40 -05:00
f67d16268c try with default tag Wing Lian 2024-12-06 11:10:49 -05:00
684b543aa1 experiment with nvcr pytorch image for torch 2.5.1 Wing Lian 2024-12-06 11:07:27 -05:00
5bef19064b [tests] reset known modules that are patched on each test function end (#2147) Wing Lian 2024-12-07 17:24:46 -05:00
da3d8f31c6 Built site for gh-pages Quarto GHA Workflow Runner 2024-12-07 10:03:54 +00:00
743ba62bd5 Transformers 4.47.0 (#2138) Wing Lian 2024-12-07 05:03:01 -05:00
f9a7748bd8 Fix llama type model check (#2142) [skip ci] Chirag Jain 2024-12-07 15:32:32 +05:30
31723ac523 fix whitespace for patch check transformers-4_47_0_v2 Wing Lian 2024-12-06 16:43:44 -05:00
2e9e423dfd detab the code to check Wing Lian 2024-12-06 16:42:29 -05:00
cbe61186dc patches for llama ga Wing Lian 2024-12-06 16:40:24 -05:00
3c07b6d6b1 lint bursteratom 2024-12-06 16:06:57 -05:00
89dae7dc6d lora_target_module bursteratom 2024-12-06 15:41:09 -05:00
1b54af8e54 lora config bursteratom 2024-12-06 15:27:18 -05:00
ca7b56cba3 lora config bursteratom 2024-12-06 15:26:06 -05:00
2a83580bdc also bump accelerate Wing Lian 2024-12-06 15:24:57 -05:00
ea8269d2eb lora config bursteratom 2024-12-06 15:23:24 -05:00
13ca7ed087 comment out lora target bursteratom 2024-12-06 15:21:08 -05:00
0dfd8541ee lora config qwen2vl bursteratom 2024-12-06 14:56:51 -05:00
75e1d3537f qwen2_vl get_text_config bursteratom 2024-12-06 14:54:06 -05:00
825f66b9fd update HF HUB env var and fix reward trainer log since it doesn't directly override log Wing Lian 2024-12-06 14:52:59 -05:00
2b7f3bd6ab qwen2_vl get_text_config bursteratom 2024-12-06 14:52:17 -05:00
d85a229afe get_text_config bursteratom 2024-12-06 14:50:05 -05:00
355cd7c872 update is_multimodal requirement to include qwen2_vl bursteratom 2024-12-06 14:43:50 -05:00
eab1638686 lint bursteratom 2024-12-06 14:37:32 -05:00
a3a4d22709 config init qwen2-vl chat template bursteratom 2024-12-06 14:24:03 -05:00
f9eb7d8663 qwen2 example bursteratom 2024-12-06 14:22:08 -05:00
343771a6d3 lint bursteratom 2024-12-06 13:15:49 -05:00
d2c32d0cba lint bursteratom 2024-12-06 13:04:42 -05:00
cec9887609 add llava chat template to config bursteratom 2024-12-06 12:57:20 -05:00
88b2cae748 llava template bursteratom 2024-12-06 12:54:43 -05:00
3b44989205 skip parent, call grandparent - yeah, super janky Wing Lian 2024-12-06 12:19:14 -05:00
aea2565938 for test only bursteratom 2024-12-06 11:54:07 -05:00
811224d7b7 broken 🦥 with latest transformers Wing Lian 2024-12-06 11:34:06 -05:00
84a14fc604 fix trl trainer.log interfaces Wing Lian 2024-12-06 10:35:29 -05:00
86cf62ca46 fix: update trainer.log signature NanoCode012 2024-11-25 18:31:43 +07:00
fc54e10455 bump transformers and trl Wing Lian 2024-12-06 10:27:12 -05:00
61dd11c23b Built site for gh-pages Quarto GHA Workflow Runner 2024-12-06 15:21:31 +00:00
cd7ff99173 Built site for gh-pages Quarto GHA Workflow Runner 2024-12-06 15:20:46 +00:00
5e9fa33f3d reduce test concurrency to avoid HF rate limiting, test suite parity (#2128) Wing Lian 2024-12-06 10:20:20 -05:00
08fa133177 Fix broken CLI; remove duplicate metadata from setup.py (#2136) Dan Saunders 2024-12-06 10:19:54 -05:00
9e7f6983de Built site for gh-pages Quarto GHA Workflow Runner 2024-12-06 14:09:51 +00:00
6b3058b2dc upgrade bnb 0.45.0 and peft 0.14.0 (#2126) Wing Lian 2024-12-06 09:08:55 -05:00
197389a388 Built site for gh-pages Quarto GHA Workflow Runner 2024-12-06 03:38:24 +00:00
5726141c4e remove accidentally included symlink (#2131) Wing Lian 2024-12-05 22:37:19 -05:00
157ee933d1 Built site for gh-pages Quarto GHA Workflow Runner 2024-12-06 03:13:49 +00:00
0c807e1cb8 Built site for gh-pages Quarto GHA Workflow Runner 2024-12-06 03:12:45 +00:00
2f3ebbc44f auto-versioning and adding axolotl.__version__ (#2127) Dan Saunders 2024-12-05 22:12:40 -05:00
fc973f4322 CLI Implementation with Click (#2107) Dan Saunders 2024-12-05 22:11:48 -05:00
e399ba533e fix license header for fix_untrained_tokens from unsloth-zoo (#2129) [skip ci] Wing Lian 2024-12-05 21:20:40 -05:00
4baf8e5e96 cleanup the readme, add Modal as sponsor (#2130) [skip ci] Wing Lian 2024-12-05 21:19:52 -05:00
1ad56303b2 lint bursteratom 2024-12-05 15:34:04 -05:00
dc055a4ef7 lint bursteratom 2024-12-05 14:59:51 -05:00
169116a50f llava example bursteratom 2024-12-05 12:58:30 -05:00
43e412f660 comment bursteratom 2024-12-04 13:18:25 -05:00
7aa57803e1 fix optimizer reset for relora sft (#1414) Wing Lian 2024-12-03 08:58:23 -05:00
1969fa3bf0 fix(readme): update cuda instructions during preprocess (#2114) [skip ci] NanoCode012 2024-12-03 20:58:03 +07:00
4078f37076 feat: add cut_cross_entropy (#2091) NanoCode012 2024-12-03 20:22:22 +07:00
f073af6d99 fix merge conflict of duplicate max_steps in config for relora (#2116) Wing Lian 2024-12-03 07:42:41 -05:00
139d2612fa fix so inference can be run against quantized models without adapters (#1834) Wing Lian 2024-12-03 00:02:38 -05:00
20573fd13e Add ds model card, rebased (#2101) [skip ci] Sunny Liu 2024-12-03 00:02:02 -05:00
2b7b4af81c fix(vlm): handle legacy conversation data format and check image in data (#2018) [skip ci] NanoCode012 2024-12-03 12:01:31 +07:00
d56260c8d5 Check torch version for ADOPT optimizer + integrating new ADOPT updates (#2104) Sunny Liu 2024-12-02 20:15:39 -05:00
cac785ec0e use pytest sugar and verbose for more info during ci (#2112) [skip ci] Wing Lian 2024-12-02 20:14:40 -05:00
e62991edef make the eval size smaller for the resume test (#2111) [skip ci] Wing Lian 2024-12-02 18:32:29 -05:00
fd9e7b55f6 build causal_conv1d and mamba-ssm into the base image (#2113) Wing Lian 2024-12-02 18:27:46 -05:00
c0c53eb62f various tests fixes for flakey tests (#2110) Wing Lian 2024-12-02 17:28:58 -05:00
b0fbd4d11d Add Exact Deduplication Feature to Preprocessing Pipeline (#2072) Oliver Molenschot 2024-12-02 05:47:10 -08:00
1a70d4d6a4 add e2e tests for Unsloth qlora and test the builds (#2093) Wing Lian 2024-11-29 20:38:49 -05:00
d8787a433f support seperate lr for embeddings, similar to loraplus (#1910) [skip ci] Wing Lian 2024-11-29 20:38:20 -05:00
e775422269 fix: ds3 and fsdp lmbench eval (#2102) [ski[p ci] NanoCode012 2024-11-30 08:37:49 +07:00
97178f5960 add finetome dataset to fixtures, check eval_loss in test (#2106) [skip ci] Wing Lian 2024-11-29 20:37:32 -05:00

... 31 32 33 34 35 ...