axolotl

tocmo0nlord/axolotl

Fork 0

Commit Graph

Select branches

Hide Pull Requests

1947fix

1991test

20230920-btlm

20231212-fixes

20240216-updates

20240307-updates

20240404-lisa-determinism

3181

4bit-optimizers

775-option-to-drop-vs-truncate-on-rows-longer-than-context-length

NanoCode012-patch-1

accelerator-args-builder

activation-offloading-torchtune

activations

activeblue/main

async-grpo-patched-v2

attention_enum

attn-implementation-refactor

attn-patches

autodoc

autogptq-tests

axolotl-ci-hf

base-model-readme-update

benchmark-callbacks-next

bursteratom-doc-faq-update

chat-dataset-tool

chat-template-granite

chore/docstring-distributed

cj_tokenizer_default_prompt_template

cli-cloud-modal-math-hard

cli-refactor

codecov-pulls-only

coderabbitai/docstrings/3e51a68

coderabbitai/docstrings/QVUilv72ojQNaYsCLVNpUpfo2rK1ZU5x90oPNXYz0ZfsWzWSHca36pjgaU5JOtZOA4gNjbjVYxShdRmkm7fGSlW

coderabbitai/docstrings/b234532

colab-misc-fixes

colab-misc-fixes-test

completion-json

compute-perplexity-metrics

cp-sdpa

cuda-12.8.1

custom-modeling

custom-trainer-cls

datasets-351

datasets-refactor

debug-hf-home-cache

deepspeed-low-cpu-mem

deepspeed_0_14_4

destroy-pg

dev-base

device-mesh

devstral-support

dft

diff-transformer

diffusion-custom-loss

diffusion-custom-models

diffusion-next-token-trainer

djsaunde-patch-1

docker-base-nvcr-pytorch

docker-cleanup-20231029

docs-lint-20250212

dpo-spawn-fix

dump-config

dynamic-sft

e2e-fsdp-trainer

embeddings-resize

enable_tp

eos-hell

exp-expand-len

fa-261

fa-check

fa3-hopper

feat/beautiful-readme

feat/glm45

feat/glmflash-other

feat/liger-deepseekv3

feat/linearize

feat/lmeval-baseten

feat/phi_35_vision

feat/pref_liger

feat/soap-optim-v2

feat/spaces-ui

feat/torchao-qlora

feat/wizard

feat_hqq

feature/attn-patches

feature/enable-huggingface-dataset-revision

feature/relora-rebased

fix-ddp_find_unused_parameters

fix-l3-lora

fix-merge-lint-issue

fix-preview

fix/cce-linear

fix/cp-waste

fix/diffusion

fix/doc-key

fix/dpo-labels

fix/eval-accu

fix/gemma3-text-only

fix/gemma3n-text-attention

fix/granite-speech

fix/hpc-root

fix/issue-1-build-deps

fix/issue-2-flash-attn-install

fix/issue-3-telemetry-whitelist

fix/issue-4-deepspeed-optional

fix/issue-5-8-docs

fix/issue-6-default-attention

fix/issue-7-hf-token-check

fix/kd-trainer-num-items

fix/merge-lora-fp32

fix/replace_jackllama

fix/rl-trainer-arg

fix/vllm-version

fix/xformers

fix_kto

fixtypo

flan-no-bos

flash-attn-2_5_5

flash-attn-fix-patches-wo-sample-packing

flex_patching_update

flx_attn_support

fp8

fsdp-defaults

fsdp-fft

fsdp-fix

fsdp-qdora

fsdp2

fsdp2_fp32

fused-mlp-ez

gh-pages

grouped_lr_squashed

grpo-path

grpo-path-v2

grpo-ref-model-cleanup

grpo_liger

hamelsmu-patch-1

hf-trainer-refactor

hymba_multipack2

ia3-peft

iterable-optional

jagged-restart-lr-scheduler-v3

kd-fix-20250519-v2

kd-logits-view

kd-logprob-data

kd-trainer

kd-trainer-2

kd-trainer-pre

kd-trainer-rebased

kd-trainer-v2

kd-trainer-zscore

keep_in_memory

kernelize-scattermoe-lora

kto_fix

kwargs-refactor

latent-space

lhl-moe-aux-loss-free

liger-063

liger-065

liger-dpo

lisa

llama-4-examples

llama-4-z3

llama-dropout

llama-flash-attn-fix

llama-multipack

llama4

llama4-patches

llava

llava-train

llmcompressor-sft

llmcompressor-sft-v2

llmcompressor-sft-wing

lora-fsdp2-doc

lora-kernels-deepspeed

lora-kernels-doc-fix

lora-quant-state-offset

lora_bf16

lora_kernels_fsdp

main

main-base

map-dataset-fetcher-fix

maverick-example

merge-lora-on-complete

merge-lora-tests

merged-2554

mistral-support

mixtral_optimized

mixtral_swiglu

mm2

mm3

mm_mc_chat

modal-upgrade-builder

model-loader-refactor

moekernels

mora

multi-gpu-state

multipack

multipack-dpo

multipack-pretraining

muon-validation

nca-pair

nd_parallel

neft-v2

no-bos-tokens-packing

no-seq-len

no-zero-ds-train

offload-activations-disk

olmo-no-position_ids

online-topk-kd

openorca

openorca-fix-mask

openorca-v2

optimizer-checkpoint

optimizer-compile

optimizers-refactor

packing-attn-limit-fa2-rebased

patch_lora_post_model_load

peft-update

phi-moe

pixtral_integration

pre-commit-update

preprocess_grpo-fix

pretrain-dataset

print_venv

pytest-each-flakey

pytest-skip-s2

q-galore

quantize-ptq-cli

quartodoc

quartodoc-fix

rala

rala-v2

reentrant-w-offloading

refactor-flash-attention

relaxed-recursive-transformers

release-0.10.x

release-0.8.x

release-v0.11.x

release-v0.12.x

release-v0.13.x

release-v0.9.x

remove-gptq-warn

revert-2332-fix_sample_packing

revert-2906-checkpoint-on-step-1

revert-multipack-changes

rl-trainers-sp

runpod-sls

sac

sageattention

save_only_model

scatter_moe

scatter_moe_eric

scattermoe-lora-optim-dtypestest

scattermoe-nanotron

sdpa-cp

sdpa-multipack

seq-parallel-ring

sequence-parallelism

shampoo

shampoo-low_bit

shared-prepared-ci

sharegpt-batched

sharegpt-field-conversations

smaller-rand-model

smol-ci

soap-optim

sp-fix-masking

sp-restore-buffers

sp-rl

sp-rl-v3

split-batches-sizes

sppo

squash_position_ids

ssmi-main

stable

streaming

streaming-on-the-fly-preprocess

streaming-remote-dataset

streaming-v2

swe-rebench-rl-rebase

telemetry

telemetry-opt-in

tensor-parallel

tensorboard-loss-check

testingci

textui

tinyllama-example

tool-mpm

topk-logprobs-triton

torch-211-base

torch_tensor_parallel

tp_support

train-refactor

transformers-4511

transformers-4513

transformers-4573

transformers-4_47_0_v2

transformers-fsdp-check

transformers-itl-refactor

tui

unsloth_modules

update-examples-llama3-ez

update-lgpl

update-vllm

upgrade-liger-test

upgrade-torchao-0.15

upgrade-trl-v0.12.0_2

upgrade_liger-tr4.46.1

uv-first

uv-fixup

vendor-moe

version-dev

vllm-0191

wait-distributed-close

weight-scale-norm

xformers-wo-packing

yayi2

zero3-8bit-lora

v0.1.0

v0.10.0

v0.10.1

v0.11.0

v0.11.0.post1

v0.12.0

v0.12.1

v0.12.2

v0.13.0

v0.13.1

v0.13.2

v0.14.0

v0.15.0

v0.16.0

v0.16.1

v0.2.0

v0.2.1

v0.3.0

v0.4.0

v0.5.0

v0.5.1

v0.5.1.post1

v0.5.2

v0.6.0

v0.7.0

v0.7.1

v0.8.0

v0.8.1

v0.9.0

v0.9.1

v0.9.1.post1

v0.9.2

73450d9de7 TORCH_CUDA_ARCH_LIST should be an ARG Wing Lian 2023-05-07 07:28:57 -04:00
97cf77891e run this on self hosted runner for now Wing Lian 2023-05-07 04:30:19 -04:00
e2599edab9 runs on larger git runner? Wing Lian 2023-05-07 04:12:47 -04:00
75bc8561c0 don't push the image Wing Lian 2023-05-07 03:39:05 -04:00
15bdbae805 run on git commit Wing Lian 2023-05-07 03:37:59 -04:00
6603b3744e try docker build on gitlab Wing Lian 2023-05-07 03:10:14 -04:00
2634689774 build dockerfile in gha Wing Lian 2023-05-07 02:58:21 -04:00
4818380fa6 update stablelm config Wing Lian 2023-05-07 01:58:23 -04:00
247825bd57 refactor inference, warn if model is frozen Wing Lian 2023-05-07 01:53:30 -04:00
cb9a887047 Merge pull request #13 from winglian/dev Wing Lian 2023-05-07 01:48:02 -04:00
a15d823b29 Merge pull request #12 from NanoCode012/feat/eval_config Wing Lian 2023-05-07 01:46:53 -04:00
0e74b6402e Add eval_batch_size for evaluation NanoCode012 2023-05-06 22:21:24 +09:00
a10a8265ef fix log sweep lr Wing Lian 2023-05-03 15:06:03 -04:00
9105935b00 support for multi line inference input, log sweep over learning rates Wing Lian 2023-05-03 13:48:54 -04:00
7748f3d6da fix adam bnb optimizer grouped parameters, fix peft model 8bit conversion logic, black formatting Wing Lian 2023-05-01 16:31:46 -04:00
fe9c29d73e install peft from main branch Wing Lian 2023-05-01 12:24:04 -04:00
2255bb7f4f support llama-adapter zero init attention Wing Lian 2023-05-01 10:42:21 -04:00
55baef0e03 use prebuilt wheels for flash-attn and deepspeed Wing Lian 2023-05-01 09:52:03 -04:00
ad2b48c0fa fdsp config dict fix, todo list, add torchdistx support Wing Lian 2023-04-30 13:32:07 -04:00
9190ada23a 8bit and deepspeed changes Wing Lian 2023-04-30 06:50:35 -04:00
4dbef0941f update ds_config Wing Lian 2023-04-30 04:24:58 -04:00
6dfdd2dec0 don't load models in 8bit unless they are using an adapter, also fix tokenizer load in exceptional case Wing Lian 2023-04-30 03:19:56 -04:00
29936bba7f fix fsdp training args Wing Lian 2023-04-30 00:56:28 -04:00
78821815de fix for zero value warmup steps Wing Lian 2023-04-30 00:34:12 -04:00
5159d00a86 fix sharegpt tokenization, refactor tokenization debugging Wing Lian 2023-04-30 00:23:53 -04:00
c0f50d9c61 wire up gradient checkpointing for 4bit Wing Lian 2023-04-28 22:27:33 -04:00
4e705eda6d Merge pull request #9 from winglian/dev Wing Lian 2023-04-24 21:56:17 -04:00
4a17a4c9a1 fix dataset handling, support galactica Wing Lian 2023-04-24 10:54:45 -04:00
097d367af6 tweaks to data loading, 8 bit adam, accelerate and deepspeed Wing Lian 2023-04-22 16:25:23 -04:00
4f2584f2dc shuffle and split dataset after save/load Wing Lian 2023-04-20 14:39:47 -04:00
8d437853c8 fix sharegpt handling from hf, don't worry about loading llama if using earlier transformers release Wing Lian 2023-04-20 09:19:46 -04:00
8e2a5609b3 stablelm support Wing Lian 2023-04-19 18:13:51 -04:00
94f5e415a3 various bugfixes Wing Lian 2023-04-19 17:04:34 -04:00
2624bc2f11 ignore config, add python 3.9 (#8) Eric Hartford 2023-04-24 04:23:19 -07:00
bb991fd870 fix bug when model_type not explicitly passed Wing Lian 2023-04-19 13:15:33 -04:00
d65385912e improve inference Wing Lian 2023-04-19 12:57:27 -04:00
5749eb0a1c fix runpod script Wing Lian 2023-04-19 08:39:54 -04:00
7753cdee57 cleanup empty lines, tweak env for runpod setup Wing Lian 2023-04-19 08:24:58 -04:00
f50de1b1cb handle empty lines Wing Lian 2023-04-19 08:03:34 -04:00
0a472e1e08 quickstart instructions for starting from runpod (#5) Wing Lian 2023-04-18 19:22:25 -04:00
5cb7ea49a6 update readme w compat matrix Wing Lian 2023-04-18 14:42:37 -04:00
8746b701fe attempt xformers hijack attention Wing Lian 2023-04-18 10:44:56 -04:00
6045345d6b WIP large refactor to make finetune script a little more manageable (#3) Wing Lian 2023-04-18 14:01:38 -04:00
81de0efc18 add support for alpaca reflect training (#2) stable Wing Lian 2023-04-18 08:34:05 -04:00
34af1b465f update readme Wing Lian 2023-04-18 01:58:32 -04:00
87d7825435 Tokenization open assistant (#1) Wing Lian 2023-04-18 01:45:49 -04:00
eb808903e5 fix llama check Wing Lian 2023-04-18 01:19:53 -04:00
3f3f561c06 update readme Wing Lian 2023-04-18 00:45:25 -04:00
8f36f3cd5a fix conditional check to prevent always using 4bit Wing Lian 2023-04-18 00:35:03 -04:00
69164da079 imrpove llama check and fix safetensors file check Wing Lian 2023-04-17 23:49:21 -04:00
e1076430ff suppport for alpaca-like instruction datasets without inputs Wing Lian 2023-04-17 23:32:57 -04:00
2db9436410 casts the prepared data to int16 (doesn't help with training memory) Wing Lian 2023-04-17 21:36:02 -04:00
120e7df7df bugfixes Wing Lian 2023-04-17 18:23:55 -04:00
87e073d0de fix lora target module, require explicit flash attention, fix min logging steps, don't use adam8bit for int4, hash prepared datasets, support hf hub datasets Wing Lian 2023-04-17 18:01:12 -04:00
4131183115 fix install to work with latest alpaca lora 4bit Wing Lian 2023-04-17 12:45:12 -04:00
77fca25f1b 4bit quantized support (wip) Wing Lian 2023-04-17 11:37:39 -04:00
12de7b7cf7 cleanup, prep for 4bit quant support Wing Lian 2023-04-16 11:06:41 -04:00
d1aed4c8e5 deepspeed doesn't work with flash-attn, and the gpu savings w flash attn are better than the deepspeed headaches Wing Lian 2023-04-16 06:59:47 -04:00
a4593832a9 fix logging Wing Lian 2023-04-15 23:12:48 -04:00
23938015c8 prepare datasets only flag Wing Lian 2023-04-15 16:30:55 -04:00
d060c803ce add llama 7b config and fiz lora_fan_in_fan_out for llama (copy pasta bug) Wing Lian 2023-04-15 14:26:52 -04:00
d33a975747 configure log level, add llama 7b config Wing Lian 2023-04-15 14:24:37 -04:00
05fffb53b4 more logging, wandb fixes Wing Lian 2023-04-15 13:37:17 -04:00
2df63ef815 refactor trainer setup to account for deepspeed integration Wing Lian 2023-04-15 12:16:42 -04:00
b164725417 improve prepared dataset loading, fix inference Wing Lian 2023-04-15 12:14:52 -04:00
937f44f021 helpful info output Wing Lian 2023-04-15 00:03:43 -04:00
902dd0ab47 fix issue with completed model being empty Wing Lian 2023-04-14 23:57:55 -04:00
80b2ed29d8 various bugfixes Wing Lian 2023-04-14 21:37:07 -04:00
45f77dd51e bettter handling of llama model import Wing Lian 2023-04-14 19:30:41 -04:00
949a27be21 more fixes and prep for llama training Wing Lian 2023-04-14 18:30:09 -04:00
f2a2029d0d config chooser, update readme instructions, device config, llama flash attention, debug out the labels, fix config key checks, other bugfixes Wing Lian 2023-04-14 12:18:56 -04:00
a6028d302e black formatting Wing Lian 2023-04-14 07:25:52 -04:00
8d959a7e26 make it work with pythia in the cloud Wing Lian 2023-04-14 07:24:55 -04:00
ce24f5e246 WIP for axolotl trainer Wing Lian 2023-04-14 00:20:05 -04:00
e9da4b9a30 initial commit of README Wing Lian 2023-04-13 17:49:54 -04:00

... 54 55 56 57 58