Merge branch 'main' of https://github.com/OpenAccess-AI-Collective/axolotl into qlora-openllama-3b-example

This commit is contained in:
jphillips
2023-05-29 09:09:43 -05:00
4 changed files with 17 additions and 11 deletions

View File

@@ -1,4 +1,4 @@
name: ci-cd name: ci-cd-base
on: on:
push: push:

View File

@@ -62,6 +62,7 @@ RUN git clone https://github.com/microsoft/DeepSpeed.git && \
FROM base-builder AS bnb-builder FROM base-builder AS bnb-builder
WORKDIR /workspace WORKDIR /workspace
ENV CUDA_VERSION_BNB=$CUDA_VERSION_BNB
RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \ RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
cd bitsandbytes && \ cd bitsandbytes && \
@@ -70,6 +71,8 @@ RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
FROM base-builder FROM base-builder
ENV CUDA_VERSION_BNB=$CUDA_VERSION_BNB
# recompile apex # recompile apex
RUN python3 -m pip uninstall -y apex RUN python3 -m pip uninstall -y apex
RUN git clone https://github.com/NVIDIA/apex RUN git clone https://github.com/NVIDIA/apex

View File

@@ -178,6 +178,15 @@ def train(
tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH
) )
if cfg.debug or "debug" in kwargs:
logging.info("check_dataset_labels...")
check_dataset_labels(
train_dataset.select(
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
),
tokenizer,
)
if prepare_ds_only: if prepare_ds_only:
logging.info("Finished preparing dataset. Exiting...") logging.info("Finished preparing dataset. Exiting...")
return return
@@ -213,15 +222,6 @@ def train(
model.save_pretrained(cfg.output_dir) model.save_pretrained(cfg.output_dir)
return return
if cfg.debug:
logging.info("check_dataset_labels...")
check_dataset_labels(
train_dataset.select(
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
),
tokenizer,
)
trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer) trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer)
model.config.use_cache = False model.config.use_cache = False

View File

@@ -268,6 +268,9 @@ class AlpacaReflectionPTStrategy(ReflectionPromptTokenizingStrategy):
class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy): class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
def get_conversation_thread(self, prompt):
return prompt["conversations"]
def tokenize_prompt(self, prompt): def tokenize_prompt(self, prompt):
result = { result = {
"input_ids": [], "input_ids": [],
@@ -279,7 +282,7 @@ class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
assistant_token = self._get_assistant_token() assistant_token = self._get_assistant_token()
try: try:
for i, part in enumerate( for i, part in enumerate(
self.prompter.build_prompt(prompt["conversations"]) self.prompter.build_prompt(self.get_conversation_thread(prompt))
): ):
if isinstance(part, tuple): if isinstance(part, tuple):
if part[0] == "USER:": if part[0] == "USER:":