Compare commits
4 Commits
grouped_lr
...
optimizer-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7b095d77f | ||
|
|
7a38dbe674 | ||
|
|
e0a2eb2ebd | ||
|
|
d852d7af7a |
@@ -61,4 +61,4 @@ antlr4-python3-runtime==4.13.2
|
|||||||
torchao==0.7.0
|
torchao==0.7.0
|
||||||
schedulefree==1.3.0
|
schedulefree==1.3.0
|
||||||
|
|
||||||
axolotl-contribs-lgpl==0.0.1b2
|
axolotl-contribs-lgpl==0.0.2
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ def evaluate(config: str, accelerate: bool, **kwargs):
|
|||||||
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
@click.argument("config", type=click.Path(exists=True, path_type=str))
|
||||||
@click.option(
|
@click.option(
|
||||||
"--accelerate/--no-accelerate",
|
"--accelerate/--no-accelerate",
|
||||||
default=True,
|
default=False,
|
||||||
help="Use accelerate launch for multi-GPU inference",
|
help="Use accelerate launch for multi-GPU inference",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
@@ -124,7 +124,7 @@ def inference(
|
|||||||
if lora_model_dir:
|
if lora_model_dir:
|
||||||
kwargs["lora_model_dir"] = lora_model_dir
|
kwargs["lora_model_dir"] = lora_model_dir
|
||||||
if base_model:
|
if base_model:
|
||||||
kwargs["output_dir"] = base_model
|
kwargs["base_model"] = base_model
|
||||||
|
|
||||||
if accelerate:
|
if accelerate:
|
||||||
base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.inference"]
|
base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.inference"]
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ from axolotl.utils.callbacks import (
|
|||||||
)
|
)
|
||||||
from axolotl.utils.callbacks.lisa import lisa_callback_factory
|
from axolotl.utils.callbacks.lisa import lisa_callback_factory
|
||||||
from axolotl.utils.callbacks.profiler import PytorchProfilerCallback
|
from axolotl.utils.callbacks.profiler import PytorchProfilerCallback
|
||||||
from axolotl.utils.chat_templates import get_chat_template
|
from axolotl.utils.chat_templates import get_chat_template_from_config
|
||||||
from axolotl.utils.collators import (
|
from axolotl.utils.collators import (
|
||||||
BatchSamplerDataCollatorForSeq2Seq,
|
BatchSamplerDataCollatorForSeq2Seq,
|
||||||
DataCollatorForSeq2Seq,
|
DataCollatorForSeq2Seq,
|
||||||
@@ -424,6 +424,11 @@ class SchedulerMixin(Trainer):
|
|||||||
|
|
||||||
return self.lr_scheduler
|
return self.lr_scheduler
|
||||||
|
|
||||||
|
def _load_optimizer_and_scheduler(self, checkpoint):
|
||||||
|
if not checkpoint and self.args.optimizer_checkpoint is not None:
|
||||||
|
checkpoint = self.args.optimizer_checkpoint
|
||||||
|
return super()._load_optimizer_and_scheduler(checkpoint)
|
||||||
|
|
||||||
|
|
||||||
class AxolotlTrainer(SchedulerMixin, Trainer):
|
class AxolotlTrainer(SchedulerMixin, Trainer):
|
||||||
"""
|
"""
|
||||||
@@ -1764,6 +1769,10 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|||||||
] = self.cfg.loraplus_lr_embedding
|
] = self.cfg.loraplus_lr_embedding
|
||||||
training_arguments_kwargs["embedding_lr"] = self.cfg.embedding_lr
|
training_arguments_kwargs["embedding_lr"] = self.cfg.embedding_lr
|
||||||
training_arguments_kwargs["embedding_lr_scale"] = self.cfg.embedding_lr_scale
|
training_arguments_kwargs["embedding_lr_scale"] = self.cfg.embedding_lr_scale
|
||||||
|
if self.cfg.optimizer_checkpoint:
|
||||||
|
training_arguments_kwargs[
|
||||||
|
"optimizer_checkpoint"
|
||||||
|
] = self.cfg.optimizer_checkpoint
|
||||||
|
|
||||||
if self.cfg.lr_scheduler in ["one_cycle", "log_sweep"]:
|
if self.cfg.lr_scheduler in ["one_cycle", "log_sweep"]:
|
||||||
training_arguments_kwargs["lr_scheduler_type"] = "cosine"
|
training_arguments_kwargs["lr_scheduler_type"] = "cosine"
|
||||||
@@ -1834,8 +1843,8 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|||||||
training_arguments_kwargs["model_type"] = self.cfg.model_config_type
|
training_arguments_kwargs["model_type"] = self.cfg.model_config_type
|
||||||
training_arguments_kwargs["pretraining"] = bool(self.cfg.pretraining_dataset)
|
training_arguments_kwargs["pretraining"] = bool(self.cfg.pretraining_dataset)
|
||||||
if self.cfg.chat_template:
|
if self.cfg.chat_template:
|
||||||
training_arguments_kwargs["chat_template"] = get_chat_template(
|
training_arguments_kwargs["chat_template"] = get_chat_template_from_config(
|
||||||
self.cfg.chat_template,
|
cfg=self.cfg,
|
||||||
tokenizer=self.tokenizer,
|
tokenizer=self.tokenizer,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""Prepare and train a model on a dataset. Can also infer from a model or merge lora"""
|
"""Prepare and train a model on a dataset. Can also infer from a model or merge lora"""
|
||||||
|
|
||||||
|
import inspect
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
@@ -126,6 +127,19 @@ def train(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if cfg.fix_untrained_tokens:
|
if cfg.fix_untrained_tokens:
|
||||||
|
# check if the `token_ids_to_fix` kwarg exists in the fix_untrained_tokens args
|
||||||
|
sig = inspect.signature(fix_untrained_tokens)
|
||||||
|
# if the function has the `token_ids_to_fix` arg, and fix_untrained_tokens is a list
|
||||||
|
if "token_ids_to_fix" in sig.parameters and isinstance(
|
||||||
|
cfg.fix_untrained_tokens, list
|
||||||
|
):
|
||||||
|
fix_untrained_tokens(
|
||||||
|
model,
|
||||||
|
tokenizer,
|
||||||
|
train_dataset,
|
||||||
|
token_ids_to_fix=cfg.fix_untrained_tokens,
|
||||||
|
)
|
||||||
|
else:
|
||||||
fix_untrained_tokens(model, tokenizer, train_dataset)
|
fix_untrained_tokens(model, tokenizer, train_dataset)
|
||||||
if cfg.local_rank == 0:
|
if cfg.local_rank == 0:
|
||||||
model.save_pretrained(
|
model.save_pretrained(
|
||||||
|
|||||||
@@ -603,6 +603,8 @@ class AxolotlInputConfig(
|
|||||||
strict: Optional[bool] = Field(default=False)
|
strict: Optional[bool] = Field(default=False)
|
||||||
resume_from_checkpoint: Optional[str] = None
|
resume_from_checkpoint: Optional[str] = None
|
||||||
auto_resume_from_checkpoints: Optional[bool] = None
|
auto_resume_from_checkpoints: Optional[bool] = None
|
||||||
|
optimizer_checkpoint: Optional[str] = None
|
||||||
|
|
||||||
resize_token_embeddings_to_32x: Optional[bool] = None
|
resize_token_embeddings_to_32x: Optional[bool] = None
|
||||||
mean_resizing_embeddings: Optional[bool] = False
|
mean_resizing_embeddings: Optional[bool] = False
|
||||||
|
|
||||||
@@ -794,7 +796,7 @@ class AxolotlInputConfig(
|
|||||||
chat_template_jinja: Optional[str] = None
|
chat_template_jinja: Optional[str] = None
|
||||||
default_system_message: Optional[str] = None
|
default_system_message: Optional[str] = None
|
||||||
|
|
||||||
fix_untrained_tokens: Optional[bool] = None
|
fix_untrained_tokens: Optional[Union[int, List[int]]] = None
|
||||||
|
|
||||||
# INTERNALS - document for now, generally not set externally
|
# INTERNALS - document for now, generally not set externally
|
||||||
is_preprocess: Optional[bool] = None
|
is_preprocess: Optional[bool] = None
|
||||||
|
|||||||
Reference in New Issue
Block a user