diff --git a/docs/llava.md b/docs/llava.md new file mode 100644 index 000000000..3363a442b --- /dev/null +++ b/docs/llava.md @@ -0,0 +1,36 @@ +# LLaVA + +### Installing dependencies + +```shell +git clone https://github.com/haotian-liu/LLaVA.git +cd LLaVA +pip install --no-deps -e . +``` + +### Downloading assets + +LLaVA doesn't support remote datasets, so both the JSON and image assets need to be downloaded locally + +```shell +mkdir llava +mkdir data +cd llava +curl -L -O https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain/resolve/main/images.zip +unzip images.zip + +cd ../data +curl -L -O https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain/resolve/main/blip_laion_cc_sbu_558k.json +``` + +### Pretraining + +Pretraining aligns the vision model with the language model. + +```shell +accelerate launch -m axolotl.cli.train_mm examples/multimodal/pretrain-llava-llama.yml +``` + +### Finetuning + +TBD diff --git a/examples/multimodal/pretrain-llava-llama.yml b/examples/multimodal/pretrain-llava-llama.yml index 855629ee4..533cec41f 100644 --- a/examples/multimodal/pretrain-llava-llama.yml +++ b/examples/multimodal/pretrain-llava-llama.yml @@ -17,7 +17,6 @@ strict: false datasets: - path: ./data/blip_laion_cc_sbu_558k.json -# - path: liuhaotian/LLaVA-CC3M-Pretrain-595K dataset_prepared_path: val_set_size: 0.0 output_dir: ./out @@ -54,7 +53,7 @@ xformers_attention: flash_attention: true warmup_steps: 10 -eval_steps: 0.05 +eval_steps: save_steps: debug: deepspeed: diff --git a/examples/multimodal/pretrain-llava-mistral.yml b/examples/multimodal/pretrain-llava-mistral.yml index 2d85a480c..ca113c168 100644 --- a/examples/multimodal/pretrain-llava-mistral.yml +++ b/examples/multimodal/pretrain-llava-mistral.yml @@ -16,7 +16,7 @@ load_in_4bit: false strict: false datasets: - - path: liuhaotian/LLaVA-CC3M-Pretrain-595K + - path: ./data/blip_laion_cc_sbu_558k.json dataset_prepared_path: val_set_size: 0.0 output_dir: ./out @@ -53,7 +53,7 @@ xformers_attention: flash_attention: true warmup_steps: 10 -eval_steps: 0.05 +eval_steps: save_steps: debug: deepspeed: