add docs and tweak yml

This commit is contained in:
Wing Lian
2023-10-28 13:07:59 -04:00
parent 7ff30c4033
commit 1321608dc4
3 changed files with 39 additions and 4 deletions

36
docs/llava.md Normal file
View File

@@ -0,0 +1,36 @@
# LLaVA
### Installing dependencies
```shell
git clone https://github.com/haotian-liu/LLaVA.git
cd LLaVA
pip install --no-deps -e .
```
### Downloading assets
LLaVA doesn't support remote datasets, so both the JSON and image assets need to be downloaded locally
```shell
mkdir llava
mkdir data
cd llava
curl -L -O https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain/resolve/main/images.zip
unzip images.zip
cd ../data
curl -L -O https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain/resolve/main/blip_laion_cc_sbu_558k.json
```
### Pretraining
Pretraining aligns the vision model with the language model.
```shell
accelerate launch -m axolotl.cli.train_mm examples/multimodal/pretrain-llava-llama.yml
```
### Finetuning
TBD

View File

@@ -17,7 +17,6 @@ strict: false
datasets:
- path: ./data/blip_laion_cc_sbu_558k.json
# - path: liuhaotian/LLaVA-CC3M-Pretrain-595K
dataset_prepared_path:
val_set_size: 0.0
output_dir: ./out
@@ -54,7 +53,7 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
eval_steps:
save_steps:
debug:
deepspeed:

View File

@@ -16,7 +16,7 @@ load_in_4bit: false
strict: false
datasets:
- path: liuhaotian/LLaVA-CC3M-Pretrain-595K
- path: ./data/blip_laion_cc_sbu_558k.json
dataset_prepared_path:
val_set_size: 0.0
output_dir: ./out
@@ -53,7 +53,7 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
eval_steps:
save_steps:
debug:
deepspeed: