fix tokenizer overrides w gemma3 (#2488)

* fix tokenizer overrides w gemma3

* fix offline wrapping
This commit is contained in:
Wing Lian
2025-04-05 01:25:44 -04:00
committed by GitHub
parent de451f99a5
commit 949471039f
4 changed files with 48 additions and 4 deletions

View File

@@ -283,6 +283,13 @@ def modify_tokenizer_files(
raise ValueError(
f"Token ID {token_id} not found in added_tokens"
)
if "model" in tokenizer_data and "vocab" in tokenizer_data["model"]:
for token_id, new_value in token_id_mappings.items():
for entry_val, entry_id in tokenizer_data["model"]["vocab"].items():
if entry_id == token_id:
del tokenizer_data["model"]["vocab"][entry_val]
tokenizer_data["model"]["vocab"][new_value] = token_id
break
# Write the updated tokenizer data back
with open(tokenizer_path, "w", encoding="utf-8") as f: