Compare commits

..

2 Commits

Author SHA1 Message Date
Wing Lian
b7fe46579d make the conversations/messages field configurable for sharegpt 2024-03-08 08:08:29 -05:00
Wing Lian
638c2dafb5 JarvisLabs (#1372)
* add Jarvis cloud gpu and sponsorship

* whitespace
2024-03-07 10:47:32 -05:00
7 changed files with 23 additions and 33 deletions

View File

@@ -25,7 +25,7 @@ Features:
- [Environment](#environment)
- [Docker](#docker)
- [Conda/Pip venv](#condapip-venv)
- [Cloud GPU](#cloud-gpu) - Latitude.sh, RunPod
- [Cloud GPU](#cloud-gpu) - Latitude.sh, JarvisLabs, RunPod
- [Bare Metal Cloud GPU](#bare-metal-cloud-gpu)
- [Windows](#windows)
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
@@ -199,6 +199,7 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags)
- on Latitude.sh use this [direct link](https://latitude.sh/blueprint/989e0e79-3bf6-41ea-a46b-1f246e309d5c)
- on JarvisLabs.ai use this [direct link](https://jarvislabs.ai/templates/axolotl)
- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
#### Bare Metal Cloud GPU
@@ -1298,4 +1299,6 @@ consider sponsoring the project via [GitHub Sponsors](https://github.com/sponsor
#### 🥉 Bronze Sponsors - $500/mo
- [JarvisLabs.ai](https://jarvislabs.ai)
---

View File

@@ -741,6 +741,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
return AxolotlTrainer
def build(self, total_num_steps):
warmup_steps = None
if self.cfg.warmup_steps is not None:
warmup_steps = self.cfg.warmup_steps
elif self.cfg.warmup_ratio is not None:

View File

@@ -39,6 +39,8 @@ def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
)
if ds_cfg and "strict" in ds_cfg:
strategy.strict = ds_cfg["strict"]
if ds_cfg and "field_messages" in ds_cfg:
strategy.field_messages = ds_cfg["field_messages"]
return strategy
@@ -83,6 +85,7 @@ class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
"""
_strict = False
_field_messages = "conversations"
@property
def strict(self):
@@ -92,8 +95,16 @@ class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
def strict(self, strict):
self._strict = strict
@property
def field_messages(self):
return self._strict
@field_messages.setter
def field_messages(self, field_messages):
self._field_messages = field_messages
def get_conversation_thread(self, prompt):
conversations = prompt["conversations"]
conversations = prompt[self.field_messages]
if self.strict:
return conversations
role_key = "from"

View File

@@ -11,7 +11,7 @@ import torch
import transformers.modelcard
from accelerate.logging import get_logger
from datasets import Dataset
from peft import PeftModel, PeftModelForCausalLM
from peft import PeftModel
from pkg_resources import get_distribution # type: ignore
from transformers import PreTrainedModel, PreTrainedTokenizer
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
@@ -207,20 +207,6 @@ def train(
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
if cfg.adapter and isinstance(model, (PeftModel, PeftModelForCausalLM)):
model.to("cpu")
model = model.merge_and_unload()
if cfg.local_rank == 0:
LOG.info(f"saving merged model to: {str(Path(cfg.output_dir) / 'merged')}")
model.save_pretrained(
str(Path(cfg.output_dir) / "merged"),
safe_serialization=safe_serialization,
progressbar=True,
)
tokenizer.save_pretrained(str(Path(cfg.output_dir) / "merged"))
if not cfg.hub_model_id:
try:
trainer.create_model_card(model_name=cfg.output_dir.lstrip("./"))

View File

@@ -114,9 +114,7 @@ def prepare_dataset(cfg, tokenizer):
total_eval_steps = calculate_total_num_steps(cfg, eval_dataset, update=False)
if total_eval_steps == 0:
raise ValueError(
"eval dataset split is too small for sample_packing. "
"You should set `eval_sample_packing: False` "
"or decrease the value of `eval_batch_size`. "
"eval dataset split is too small for sample_packing. You should set `eval_sample_packing: False`. "
)
if cfg.max_steps:

View File

@@ -5,7 +5,7 @@ Multipack Batch Sampler
import logging
import math
import os
from typing import Any, Iterable, List, Union, Optional
from typing import Any, Iterable, List, Union
import numba
import numpy as np
@@ -115,14 +115,12 @@ class MultipackBatchSampler(BatchSampler):
batch_max_len: int,
lengths: np.ndarray,
packing_efficiency_estimate: float = 1.0,
consistent_length: Optional[bool] = False,
):
super().__init__(sampler, batch_size, drop_last)
self.batch_size = batch_size
self.batch_max_len = batch_max_len
self.lengths: np.ndarray = lengths
self.packing_efficiency_estimate = packing_efficiency_estimate or 1.0
self.consistent_length = consistent_length
assert isinstance(self.lengths, np.ndarray)
@@ -166,18 +164,11 @@ class MultipackBatchSampler(BatchSampler):
def __iter__(self):
batches = self.generate_batches(set_stats=True)
if self.consistent_length:
length = self._len_est()
return iter(batches[:length])
else:
return iter(batches)
return iter(batches)
def num_batches(self):
batches = self.generate_batches(set_stats=True)
if self.consistent_length:
return self._len_est()
else:
return len(batches)
return len(batches)
def efficiency(self):
return self.eff_total_used / self.eff_total_slots

View File

@@ -277,7 +277,7 @@ def calculate_total_num_steps(cfg, train_dataset, update=True):
calc_sample_packing_eff_est,
)
sample_packing_eff_est = (
math.ceil(sample_packing_actual_eff_all * 10000.0) / 10000.0
math.ceil(sample_packing_actual_eff_all * 100.0) / 100.0
)
if update:
cfg.sample_packing_eff_est = sample_packing_eff_est