make sure to register the base chatml template even if no system message is provided (#1207)

This commit is contained in:
Wing Lian
2024-01-25 10:38:08 -05:00
committed by GitHub
parent a01b998c0f
commit badda3783b
4 changed files with 11 additions and 1 deletions

View File

@@ -106,3 +106,7 @@ jobs:
- name: GPU Unit Tests monkeypatched w docker image
run: |
docker run --privileged --gpus "all" --env WANDB_DISABLED=true --rm ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} pytest /workspace/axolotl/tests/e2e/patched/
- name: Prune image from docker
if: github.ref != 'refs/heads/main'
run: |
docker rmi -f ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}

View File

@@ -40,6 +40,8 @@ def do_cli(config: Path = Path("examples/"), **kwargs):
f"ChatML set. Adding default system message: {parsed_cfg.default_system_message}"
)
register_chatml_template(parsed_cfg.default_system_message)
else:
register_chatml_template()
if not parsed_cfg.dataset_prepared_path:
msg = (

View File

@@ -43,7 +43,10 @@ def do_train(cfg, cli_args) -> Tuple[PreTrainedModel, PreTrainedTokenizer]:
f"ChatML set. Adding default system message: {cfg.default_system_message}"
)
register_chatml_template(cfg.default_system_message)
else:
register_chatml_template()
if cfg.rl:
dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args)
else:
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)

View File

@@ -16,6 +16,7 @@ from datasets import (
load_from_disk,
)
from huggingface_hub import hf_hub_download
from huggingface_hub.utils import HFValidationError
from torch.utils.data import RandomSampler
from transformers import PreTrainedTokenizerBase
@@ -213,7 +214,7 @@ def load_tokenized_prepared_datasets(
token=use_auth_token,
)
ds_from_hub = True
except (FileNotFoundError, ConnectionError):
except (FileNotFoundError, ConnectionError, HFValidationError):
pass
ds_from_cloud = False