add docs and tweak yml
This commit is contained in:
36
docs/llava.md
Normal file
36
docs/llava.md
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# LLaVA
|
||||||
|
|
||||||
|
### Installing dependencies
|
||||||
|
|
||||||
|
```shell
|
||||||
|
git clone https://github.com/haotian-liu/LLaVA.git
|
||||||
|
cd LLaVA
|
||||||
|
pip install --no-deps -e .
|
||||||
|
```
|
||||||
|
|
||||||
|
### Downloading assets
|
||||||
|
|
||||||
|
LLaVA doesn't support remote datasets, so both the JSON and image assets need to be downloaded locally
|
||||||
|
|
||||||
|
```shell
|
||||||
|
mkdir llava
|
||||||
|
mkdir data
|
||||||
|
cd llava
|
||||||
|
curl -L -O https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain/resolve/main/images.zip
|
||||||
|
unzip images.zip
|
||||||
|
|
||||||
|
cd ../data
|
||||||
|
curl -L -O https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain/resolve/main/blip_laion_cc_sbu_558k.json
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pretraining
|
||||||
|
|
||||||
|
Pretraining aligns the vision model with the language model.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
accelerate launch -m axolotl.cli.train_mm examples/multimodal/pretrain-llava-llama.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Finetuning
|
||||||
|
|
||||||
|
TBD
|
||||||
@@ -17,7 +17,6 @@ strict: false
|
|||||||
|
|
||||||
datasets:
|
datasets:
|
||||||
- path: ./data/blip_laion_cc_sbu_558k.json
|
- path: ./data/blip_laion_cc_sbu_558k.json
|
||||||
# - path: liuhaotian/LLaVA-CC3M-Pretrain-595K
|
|
||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.0
|
val_set_size: 0.0
|
||||||
output_dir: ./out
|
output_dir: ./out
|
||||||
@@ -54,7 +53,7 @@ xformers_attention:
|
|||||||
flash_attention: true
|
flash_attention: true
|
||||||
|
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
eval_steps: 0.05
|
eval_steps:
|
||||||
save_steps:
|
save_steps:
|
||||||
debug:
|
debug:
|
||||||
deepspeed:
|
deepspeed:
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ load_in_4bit: false
|
|||||||
strict: false
|
strict: false
|
||||||
|
|
||||||
datasets:
|
datasets:
|
||||||
- path: liuhaotian/LLaVA-CC3M-Pretrain-595K
|
- path: ./data/blip_laion_cc_sbu_558k.json
|
||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.0
|
val_set_size: 0.0
|
||||||
output_dir: ./out
|
output_dir: ./out
|
||||||
@@ -53,7 +53,7 @@ xformers_attention:
|
|||||||
flash_attention: true
|
flash_attention: true
|
||||||
|
|
||||||
warmup_steps: 10
|
warmup_steps: 10
|
||||||
eval_steps: 0.05
|
eval_steps:
|
||||||
save_steps:
|
save_steps:
|
||||||
debug:
|
debug:
|
||||||
deepspeed:
|
deepspeed:
|
||||||
|
|||||||
Reference in New Issue
Block a user