Compare commits
1 Commits
sharegpt-f
...
20240307-u
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3b432346e3 |
@@ -25,7 +25,7 @@ Features:
|
|||||||
- [Environment](#environment)
|
- [Environment](#environment)
|
||||||
- [Docker](#docker)
|
- [Docker](#docker)
|
||||||
- [Conda/Pip venv](#condapip-venv)
|
- [Conda/Pip venv](#condapip-venv)
|
||||||
- [Cloud GPU](#cloud-gpu) - Latitude.sh, JarvisLabs, RunPod
|
- [Cloud GPU](#cloud-gpu) - Latitude.sh, RunPod
|
||||||
- [Bare Metal Cloud GPU](#bare-metal-cloud-gpu)
|
- [Bare Metal Cloud GPU](#bare-metal-cloud-gpu)
|
||||||
- [Windows](#windows)
|
- [Windows](#windows)
|
||||||
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
|
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
|
||||||
@@ -199,7 +199,6 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
|||||||
For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags)
|
For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags)
|
||||||
|
|
||||||
- on Latitude.sh use this [direct link](https://latitude.sh/blueprint/989e0e79-3bf6-41ea-a46b-1f246e309d5c)
|
- on Latitude.sh use this [direct link](https://latitude.sh/blueprint/989e0e79-3bf6-41ea-a46b-1f246e309d5c)
|
||||||
- on JarvisLabs.ai use this [direct link](https://jarvislabs.ai/templates/axolotl)
|
|
||||||
- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
|
- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
|
||||||
|
|
||||||
#### Bare Metal Cloud GPU
|
#### Bare Metal Cloud GPU
|
||||||
@@ -1299,6 +1298,4 @@ consider sponsoring the project via [GitHub Sponsors](https://github.com/sponsor
|
|||||||
|
|
||||||
#### 🥉 Bronze Sponsors - $500/mo
|
#### 🥉 Bronze Sponsors - $500/mo
|
||||||
|
|
||||||
- [JarvisLabs.ai](https://jarvislabs.ai)
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -741,7 +741,6 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|||||||
return AxolotlTrainer
|
return AxolotlTrainer
|
||||||
|
|
||||||
def build(self, total_num_steps):
|
def build(self, total_num_steps):
|
||||||
warmup_steps = None
|
|
||||||
if self.cfg.warmup_steps is not None:
|
if self.cfg.warmup_steps is not None:
|
||||||
warmup_steps = self.cfg.warmup_steps
|
warmup_steps = self.cfg.warmup_steps
|
||||||
elif self.cfg.warmup_ratio is not None:
|
elif self.cfg.warmup_ratio is not None:
|
||||||
|
|||||||
@@ -39,8 +39,6 @@ def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
|
|||||||
)
|
)
|
||||||
if ds_cfg and "strict" in ds_cfg:
|
if ds_cfg and "strict" in ds_cfg:
|
||||||
strategy.strict = ds_cfg["strict"]
|
strategy.strict = ds_cfg["strict"]
|
||||||
if ds_cfg and "field_messages" in ds_cfg:
|
|
||||||
strategy.field_messages = ds_cfg["field_messages"]
|
|
||||||
return strategy
|
return strategy
|
||||||
|
|
||||||
|
|
||||||
@@ -85,7 +83,6 @@ class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
_strict = False
|
_strict = False
|
||||||
_field_messages = "conversations"
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def strict(self):
|
def strict(self):
|
||||||
@@ -95,16 +92,8 @@ class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy):
|
|||||||
def strict(self, strict):
|
def strict(self, strict):
|
||||||
self._strict = strict
|
self._strict = strict
|
||||||
|
|
||||||
@property
|
|
||||||
def field_messages(self):
|
|
||||||
return self._strict
|
|
||||||
|
|
||||||
@field_messages.setter
|
|
||||||
def field_messages(self, field_messages):
|
|
||||||
self._field_messages = field_messages
|
|
||||||
|
|
||||||
def get_conversation_thread(self, prompt):
|
def get_conversation_thread(self, prompt):
|
||||||
conversations = prompt[self.field_messages]
|
conversations = prompt["conversations"]
|
||||||
if self.strict:
|
if self.strict:
|
||||||
return conversations
|
return conversations
|
||||||
role_key = "from"
|
role_key = "from"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import torch
|
|||||||
import transformers.modelcard
|
import transformers.modelcard
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
from peft import PeftModel
|
from peft import PeftModel, PeftModelForCausalLM
|
||||||
from pkg_resources import get_distribution # type: ignore
|
from pkg_resources import get_distribution # type: ignore
|
||||||
from transformers import PreTrainedModel, PreTrainedTokenizer
|
from transformers import PreTrainedModel, PreTrainedTokenizer
|
||||||
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled
|
||||||
@@ -207,6 +207,20 @@ def train(
|
|||||||
|
|
||||||
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization)
|
||||||
|
|
||||||
|
if cfg.adapter and isinstance(model, (PeftModel, PeftModelForCausalLM)):
|
||||||
|
model.to("cpu")
|
||||||
|
model = model.merge_and_unload()
|
||||||
|
|
||||||
|
if cfg.local_rank == 0:
|
||||||
|
LOG.info(f"saving merged model to: {str(Path(cfg.output_dir) / 'merged')}")
|
||||||
|
model.save_pretrained(
|
||||||
|
str(Path(cfg.output_dir) / "merged"),
|
||||||
|
safe_serialization=safe_serialization,
|
||||||
|
progressbar=True,
|
||||||
|
)
|
||||||
|
tokenizer.save_pretrained(str(Path(cfg.output_dir) / "merged"))
|
||||||
|
|
||||||
|
|
||||||
if not cfg.hub_model_id:
|
if not cfg.hub_model_id:
|
||||||
try:
|
try:
|
||||||
trainer.create_model_card(model_name=cfg.output_dir.lstrip("./"))
|
trainer.create_model_card(model_name=cfg.output_dir.lstrip("./"))
|
||||||
|
|||||||
@@ -114,7 +114,9 @@ def prepare_dataset(cfg, tokenizer):
|
|||||||
total_eval_steps = calculate_total_num_steps(cfg, eval_dataset, update=False)
|
total_eval_steps = calculate_total_num_steps(cfg, eval_dataset, update=False)
|
||||||
if total_eval_steps == 0:
|
if total_eval_steps == 0:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"eval dataset split is too small for sample_packing. You should set `eval_sample_packing: False`. "
|
"eval dataset split is too small for sample_packing. "
|
||||||
|
"You should set `eval_sample_packing: False` "
|
||||||
|
"or decrease the value of `eval_batch_size`. "
|
||||||
)
|
)
|
||||||
|
|
||||||
if cfg.max_steps:
|
if cfg.max_steps:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ Multipack Batch Sampler
|
|||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
from typing import Any, Iterable, List, Union
|
from typing import Any, Iterable, List, Union, Optional
|
||||||
|
|
||||||
import numba
|
import numba
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -115,12 +115,14 @@ class MultipackBatchSampler(BatchSampler):
|
|||||||
batch_max_len: int,
|
batch_max_len: int,
|
||||||
lengths: np.ndarray,
|
lengths: np.ndarray,
|
||||||
packing_efficiency_estimate: float = 1.0,
|
packing_efficiency_estimate: float = 1.0,
|
||||||
|
consistent_length: Optional[bool] = False,
|
||||||
):
|
):
|
||||||
super().__init__(sampler, batch_size, drop_last)
|
super().__init__(sampler, batch_size, drop_last)
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
self.batch_max_len = batch_max_len
|
self.batch_max_len = batch_max_len
|
||||||
self.lengths: np.ndarray = lengths
|
self.lengths: np.ndarray = lengths
|
||||||
self.packing_efficiency_estimate = packing_efficiency_estimate or 1.0
|
self.packing_efficiency_estimate = packing_efficiency_estimate or 1.0
|
||||||
|
self.consistent_length = consistent_length
|
||||||
|
|
||||||
assert isinstance(self.lengths, np.ndarray)
|
assert isinstance(self.lengths, np.ndarray)
|
||||||
|
|
||||||
@@ -164,11 +166,18 @@ class MultipackBatchSampler(BatchSampler):
|
|||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
batches = self.generate_batches(set_stats=True)
|
batches = self.generate_batches(set_stats=True)
|
||||||
return iter(batches)
|
if self.consistent_length:
|
||||||
|
length = self._len_est()
|
||||||
|
return iter(batches[:length])
|
||||||
|
else:
|
||||||
|
return iter(batches)
|
||||||
|
|
||||||
def num_batches(self):
|
def num_batches(self):
|
||||||
batches = self.generate_batches(set_stats=True)
|
batches = self.generate_batches(set_stats=True)
|
||||||
return len(batches)
|
if self.consistent_length:
|
||||||
|
return self._len_est()
|
||||||
|
else:
|
||||||
|
return len(batches)
|
||||||
|
|
||||||
def efficiency(self):
|
def efficiency(self):
|
||||||
return self.eff_total_used / self.eff_total_slots
|
return self.eff_total_used / self.eff_total_slots
|
||||||
|
|||||||
@@ -277,7 +277,7 @@ def calculate_total_num_steps(cfg, train_dataset, update=True):
|
|||||||
calc_sample_packing_eff_est,
|
calc_sample_packing_eff_est,
|
||||||
)
|
)
|
||||||
sample_packing_eff_est = (
|
sample_packing_eff_est = (
|
||||||
math.ceil(sample_packing_actual_eff_all * 100.0) / 100.0
|
math.ceil(sample_packing_actual_eff_all * 10000.0) / 10000.0
|
||||||
)
|
)
|
||||||
if update:
|
if update:
|
||||||
cfg.sample_packing_eff_est = sample_packing_eff_est
|
cfg.sample_packing_eff_est = sample_packing_eff_est
|
||||||
|
|||||||
Reference in New Issue
Block a user