Merge pull request #104 from OpenAccess-AI-Collective/training-fixes-20230529
bnb fix, trainer debug fix
This commit is contained in:
2
.github/workflows/base.yml
vendored
2
.github/workflows/base.yml
vendored
@@ -1,4 +1,4 @@
|
|||||||
name: ci-cd
|
name: ci-cd-base
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ COPY --from=flash-attn-builder /workspace/flash-attention/csrc/rotary/dist/rotar
|
|||||||
COPY --from=flash-attn-builder /workspace/flash-attention/csrc/layer_norm/dist/dropout_layer_norm-*.whl wheels
|
COPY --from=flash-attn-builder /workspace/flash-attention/csrc/layer_norm/dist/dropout_layer_norm-*.whl wheels
|
||||||
|
|
||||||
RUN pip3 install wheels/deepspeed-*.whl wheels/flash_attn-*.whl wheels/fused_dense_lib-*.whl wheels/xentropy_cuda_lib-*.whl wheels/rotary_emb-*.whl wheels/dropout_layer_norm-*.whl
|
RUN pip3 install wheels/deepspeed-*.whl wheels/flash_attn-*.whl wheels/fused_dense_lib-*.whl wheels/xentropy_cuda_lib-*.whl wheels/rotary_emb-*.whl wheels/dropout_layer_norm-*.whl
|
||||||
RUN cd /workspace/builds/bitsandbytes && python3 setup.py install
|
RUN cd /workspace/builds/bitsandbytes && cp bitsandbytes/libbitsandbytes_cuda.so bitsandbytes/libbitsandbytes_cuda${CUDA_VERSION_BNB}.so && python3 setup.py install
|
||||||
RUN git lfs install --skip-repo
|
RUN git lfs install --skip-repo
|
||||||
RUN pip3 install "peft @ git+https://github.com/huggingface/peft.git@main" \
|
RUN pip3 install "peft @ git+https://github.com/huggingface/peft.git@main" \
|
||||||
"accelerate @ git+https://github.com/huggingface/accelerate.git@main" \
|
"accelerate @ git+https://github.com/huggingface/accelerate.git@main" \
|
||||||
|
|||||||
@@ -178,6 +178,15 @@ def train(
|
|||||||
tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH
|
tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if cfg.debug or "debug" in kwargs:
|
||||||
|
logging.info("check_dataset_labels...")
|
||||||
|
check_dataset_labels(
|
||||||
|
train_dataset.select(
|
||||||
|
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
|
||||||
|
),
|
||||||
|
tokenizer,
|
||||||
|
)
|
||||||
|
|
||||||
if prepare_ds_only:
|
if prepare_ds_only:
|
||||||
logging.info("Finished preparing dataset. Exiting...")
|
logging.info("Finished preparing dataset. Exiting...")
|
||||||
return
|
return
|
||||||
@@ -213,15 +222,6 @@ def train(
|
|||||||
model.save_pretrained(cfg.output_dir)
|
model.save_pretrained(cfg.output_dir)
|
||||||
return
|
return
|
||||||
|
|
||||||
if cfg.debug:
|
|
||||||
logging.info("check_dataset_labels...")
|
|
||||||
check_dataset_labels(
|
|
||||||
train_dataset.select(
|
|
||||||
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
|
|
||||||
),
|
|
||||||
tokenizer,
|
|
||||||
)
|
|
||||||
|
|
||||||
trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer)
|
trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer)
|
||||||
|
|
||||||
model.config.use_cache = False
|
model.config.use_cache = False
|
||||||
|
|||||||
Reference in New Issue
Block a user