use hf dataset for mmlu evals

This commit is contained in:
Wing Lian
2023-08-19 18:57:46 -04:00
parent 9aed60fa54
commit c30120e684

View File

@@ -121,20 +121,20 @@ def mmlu_eval_callback_factory(trainer, tokenizer):
mmlu_split = "eval" mmlu_split = "eval"
if trainer.args.mmlu_dataset == "mmlu-zs": if trainer.args.mmlu_dataset == "mmlu-zs":
mmlu_dataset = load_dataset( mmlu_dataset = load_dataset(
"json", "openaccess-ai-collective/mmlu-evals",
data_files={ data_files={
"eval": "data/mmlu/zero_shot_mmlu_val.json", "eval": "zero_shot_mmlu_val.json",
"test": "data/mmlu/zero_shot_mmlu_test.json", "test": "zero_shot_mmlu_test.json",
}, },
) )
mmlu_dataset = mmlu_dataset.remove_columns("subject") mmlu_dataset = mmlu_dataset.remove_columns("subject")
# MMLU Five-shot (Eval/Test only) # MMLU Five-shot (Eval/Test only)
elif trainer.args.mmlu_dataset in ["mmlu", "mmlu-fs"]: elif trainer.args.mmlu_dataset in ["mmlu", "mmlu-fs"]:
mmlu_dataset = load_dataset( mmlu_dataset = load_dataset(
"json", "openaccess-ai-collective/mmlu-evals",
data_files={ data_files={
"eval": "data/mmlu/five_shot_mmlu_val.json", "eval": "five_shot_mmlu_val.json",
"test": "data/mmlu/five_shot_mmlu_test.json", "test": "five_shot_mmlu_test.json",
}, },
) )
# mmlu_dataset = mmlu_dataset.remove_columns('subject') # mmlu_dataset = mmlu_dataset.remove_columns('subject')