Merge branch 'main' of https://github.com/OpenAccess-AI-Collective/axolotl into qlora-openllama-3b-example
This commit is contained in:
2
.github/workflows/base.yml
vendored
2
.github/workflows/base.yml
vendored
@@ -1,4 +1,4 @@
|
|||||||
name: ci-cd
|
name: ci-cd-base
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
|||||||
@@ -62,6 +62,7 @@ RUN git clone https://github.com/microsoft/DeepSpeed.git && \
|
|||||||
FROM base-builder AS bnb-builder
|
FROM base-builder AS bnb-builder
|
||||||
|
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
ENV CUDA_VERSION_BNB=$CUDA_VERSION_BNB
|
||||||
|
|
||||||
RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
|
RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
|
||||||
cd bitsandbytes && \
|
cd bitsandbytes && \
|
||||||
@@ -70,6 +71,8 @@ RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
|
|||||||
|
|
||||||
FROM base-builder
|
FROM base-builder
|
||||||
|
|
||||||
|
ENV CUDA_VERSION_BNB=$CUDA_VERSION_BNB
|
||||||
|
|
||||||
# recompile apex
|
# recompile apex
|
||||||
RUN python3 -m pip uninstall -y apex
|
RUN python3 -m pip uninstall -y apex
|
||||||
RUN git clone https://github.com/NVIDIA/apex
|
RUN git clone https://github.com/NVIDIA/apex
|
||||||
|
|||||||
@@ -178,6 +178,15 @@ def train(
|
|||||||
tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH
|
tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if cfg.debug or "debug" in kwargs:
|
||||||
|
logging.info("check_dataset_labels...")
|
||||||
|
check_dataset_labels(
|
||||||
|
train_dataset.select(
|
||||||
|
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
|
||||||
|
),
|
||||||
|
tokenizer,
|
||||||
|
)
|
||||||
|
|
||||||
if prepare_ds_only:
|
if prepare_ds_only:
|
||||||
logging.info("Finished preparing dataset. Exiting...")
|
logging.info("Finished preparing dataset. Exiting...")
|
||||||
return
|
return
|
||||||
@@ -213,15 +222,6 @@ def train(
|
|||||||
model.save_pretrained(cfg.output_dir)
|
model.save_pretrained(cfg.output_dir)
|
||||||
return
|
return
|
||||||
|
|
||||||
if cfg.debug:
|
|
||||||
logging.info("check_dataset_labels...")
|
|
||||||
check_dataset_labels(
|
|
||||||
train_dataset.select(
|
|
||||||
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
|
|
||||||
),
|
|
||||||
tokenizer,
|
|
||||||
)
|
|
||||||
|
|
||||||
trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer)
|
trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer)
|
||||||
|
|
||||||
model.config.use_cache = False
|
model.config.use_cache = False
|
||||||
|
|||||||
@@ -268,6 +268,9 @@ class AlpacaReflectionPTStrategy(ReflectionPromptTokenizingStrategy):
|
|||||||
|
|
||||||
|
|
||||||
class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
||||||
|
def get_conversation_thread(self, prompt):
|
||||||
|
return prompt["conversations"]
|
||||||
|
|
||||||
def tokenize_prompt(self, prompt):
|
def tokenize_prompt(self, prompt):
|
||||||
result = {
|
result = {
|
||||||
"input_ids": [],
|
"input_ids": [],
|
||||||
@@ -279,7 +282,7 @@ class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy):
|
|||||||
assistant_token = self._get_assistant_token()
|
assistant_token = self._get_assistant_token()
|
||||||
try:
|
try:
|
||||||
for i, part in enumerate(
|
for i, part in enumerate(
|
||||||
self.prompter.build_prompt(prompt["conversations"])
|
self.prompter.build_prompt(self.get_conversation_thread(prompt))
|
||||||
):
|
):
|
||||||
if isinstance(part, tuple):
|
if isinstance(part, tuple):
|
||||||
if part[0] == "USER:":
|
if part[0] == "USER:":
|
||||||
|
|||||||
Reference in New Issue
Block a user