fix: hijack tokenizer temporarily [skip ci]
This commit is contained in:
@@ -122,6 +122,42 @@ def modify_tokenizer_files(
|
|||||||
def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer:
|
def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer:
|
||||||
"""Load and configure the tokenizer based on the provided config."""
|
"""Load and configure the tokenizer based on the provided config."""
|
||||||
|
|
||||||
|
# if self.cfg.model_config_type == "kimi_linear":
|
||||||
|
tokenizer_for_class_loading = AutoTokenizer.from_pretrained(
|
||||||
|
cfg.tokenizer_config, trust_remote_code=True
|
||||||
|
)
|
||||||
|
tokenizer_class = tokenizer_for_class_loading.__class__
|
||||||
|
del tokenizer_for_class_loading
|
||||||
|
|
||||||
|
def patched_apply_chat_template(
|
||||||
|
self,
|
||||||
|
conversation,
|
||||||
|
tools=None,
|
||||||
|
tokenize: bool = True, # <-- FIXED DEFAULT
|
||||||
|
add_generation_prompt: bool = False, # <-- FIXED DEFAULT
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
A patched version of apply_chat_template with corrected defaults and no
|
||||||
|
external dependencies like deep_sort_dict.
|
||||||
|
"""
|
||||||
|
# The line `tools = deep_sort_dict(tools)` has been removed.
|
||||||
|
# Now we just call the superclass method, passing all arguments along.
|
||||||
|
return super(tokenizer_class, self).apply_chat_template(
|
||||||
|
conversation=conversation,
|
||||||
|
tools=tools,
|
||||||
|
tokenize=tokenize,
|
||||||
|
add_generation_prompt=add_generation_prompt,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
tokenizer_class.apply_chat_template = patched_apply_chat_template
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"Successfully patched 'apply_chat_template' on class '{tokenizer_class.__name__}' "
|
||||||
|
"with new defaults (tokenize=True, add_generation_prompt=False)."
|
||||||
|
)
|
||||||
|
|
||||||
def _load_mistral_common_tokenizer(cfg: DictDefault):
|
def _load_mistral_common_tokenizer(cfg: DictDefault):
|
||||||
"""Load mistral-common tokenizer"""
|
"""Load mistral-common tokenizer"""
|
||||||
from axolotl.utils.mistral import HFMistralTokenizer
|
from axolotl.utils.mistral import HFMistralTokenizer
|
||||||
|
|||||||
Reference in New Issue
Block a user