From 21c8e2deabdd08408abe3d4c75cf18e00bc2f30b Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 28 May 2023 14:36:33 -0400 Subject: [PATCH 1/3] refactor conversation plucking in sharegpt --- src/axolotl/prompt_tokenizers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py index bfe6fc877..a91a4e2d3 100644 --- a/src/axolotl/prompt_tokenizers.py +++ b/src/axolotl/prompt_tokenizers.py @@ -268,6 +268,9 @@ class AlpacaReflectionPTStrategy(ReflectionPromptTokenizingStrategy): class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy): + def get_conversation_thread(self, prompt): + return prompt["conversations"] + def tokenize_prompt(self, prompt): result = { "input_ids": [], @@ -279,7 +282,7 @@ class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy): assistant_token = self._get_assistant_token() try: for i, part in enumerate( - self.prompter.build_prompt(prompt["conversations"]) + self.prompter.build_prompt(self.get_conversation_thread(prompt)) ): if isinstance(part, tuple): if part[0] == "USER:": From 21f17cca691d5df41863184a188b259feada99bb Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Mon, 29 May 2023 00:06:35 -0400 Subject: [PATCH 2/3] bnb fixes --- .github/workflows/base.yml | 2 +- docker/Dockerfile-base | 2 +- scripts/finetune.py | 18 +++++++++--------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index 571faf771..5a3f90992 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -1,4 +1,4 @@ -name: ci-cd +name: ci-cd-base on: push: diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base index 4f4431dbe..6399d60ee 100644 --- a/docker/Dockerfile-base +++ b/docker/Dockerfile-base @@ -90,7 +90,7 @@ COPY --from=flash-attn-builder /workspace/flash-attention/csrc/rotary/dist/rotar COPY --from=flash-attn-builder /workspace/flash-attention/csrc/layer_norm/dist/dropout_layer_norm-*.whl wheels RUN pip3 install wheels/deepspeed-*.whl wheels/flash_attn-*.whl wheels/fused_dense_lib-*.whl wheels/xentropy_cuda_lib-*.whl wheels/rotary_emb-*.whl wheels/dropout_layer_norm-*.whl -RUN cd /workspace/builds/bitsandbytes && python3 setup.py install +RUN cd /workspace/builds/bitsandbytes && cp bitsandbytes/libbitsandbytes_cuda.so bitsandbytes/libbitsandbytes_cuda${CUDA_VERSION_BNB}.so && python3 setup.py install RUN git lfs install --skip-repo RUN pip3 install "peft @ git+https://github.com/huggingface/peft.git@main" \ "accelerate @ git+https://github.com/huggingface/accelerate.git@main" \ diff --git a/scripts/finetune.py b/scripts/finetune.py index 1d1eb9f95..58f1c0957 100644 --- a/scripts/finetune.py +++ b/scripts/finetune.py @@ -178,6 +178,15 @@ def train( tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH ) + if cfg.debug or "debug" in kwargs: + logging.info("check_dataset_labels...") + check_dataset_labels( + train_dataset.select( + [random.randrange(0, len(train_dataset) - 1) for i in range(5)] + ), + tokenizer, + ) + if prepare_ds_only: logging.info("Finished preparing dataset. Exiting...") return @@ -213,15 +222,6 @@ def train( model.save_pretrained(cfg.output_dir) return - if cfg.debug: - logging.info("check_dataset_labels...") - check_dataset_labels( - train_dataset.select( - [random.randrange(0, len(train_dataset) - 1) for i in range(5)] - ), - tokenizer, - ) - trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer) model.config.use_cache = False From 00323f0a6fa58a42730d7a8c2331d0a467c46c2c Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Mon, 29 May 2023 08:06:22 -0400 Subject: [PATCH 3/3] fix CUDA_VERSION_BNB env var --- docker/Dockerfile-base | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base index 6399d60ee..510d038ec 100644 --- a/docker/Dockerfile-base +++ b/docker/Dockerfile-base @@ -62,6 +62,7 @@ RUN git clone https://github.com/microsoft/DeepSpeed.git && \ FROM base-builder AS bnb-builder WORKDIR /workspace +ENV CUDA_VERSION_BNB=$CUDA_VERSION_BNB RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \ cd bitsandbytes && \ @@ -70,6 +71,8 @@ RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \ FROM base-builder +ENV CUDA_VERSION_BNB=$CUDA_VERSION_BNB + # recompile apex RUN python3 -m pip uninstall -y apex RUN git clone https://github.com/NVIDIA/apex @@ -90,7 +93,7 @@ COPY --from=flash-attn-builder /workspace/flash-attention/csrc/rotary/dist/rotar COPY --from=flash-attn-builder /workspace/flash-attention/csrc/layer_norm/dist/dropout_layer_norm-*.whl wheels RUN pip3 install wheels/deepspeed-*.whl wheels/flash_attn-*.whl wheels/fused_dense_lib-*.whl wheels/xentropy_cuda_lib-*.whl wheels/rotary_emb-*.whl wheels/dropout_layer_norm-*.whl -RUN cd /workspace/builds/bitsandbytes && cp bitsandbytes/libbitsandbytes_cuda.so bitsandbytes/libbitsandbytes_cuda${CUDA_VERSION_BNB}.so && python3 setup.py install +RUN cd /workspace/builds/bitsandbytes && python3 setup.py install RUN git lfs install --skip-repo RUN pip3 install "peft @ git+https://github.com/huggingface/peft.git@main" \ "accelerate @ git+https://github.com/huggingface/accelerate.git@main" \