use hf dataset for mmlu evals
This commit is contained in:
@@ -121,20 +121,20 @@ def mmlu_eval_callback_factory(trainer, tokenizer):
|
|||||||
mmlu_split = "eval"
|
mmlu_split = "eval"
|
||||||
if trainer.args.mmlu_dataset == "mmlu-zs":
|
if trainer.args.mmlu_dataset == "mmlu-zs":
|
||||||
mmlu_dataset = load_dataset(
|
mmlu_dataset = load_dataset(
|
||||||
"json",
|
"openaccess-ai-collective/mmlu-evals",
|
||||||
data_files={
|
data_files={
|
||||||
"eval": "data/mmlu/zero_shot_mmlu_val.json",
|
"eval": "zero_shot_mmlu_val.json",
|
||||||
"test": "data/mmlu/zero_shot_mmlu_test.json",
|
"test": "zero_shot_mmlu_test.json",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
mmlu_dataset = mmlu_dataset.remove_columns("subject")
|
mmlu_dataset = mmlu_dataset.remove_columns("subject")
|
||||||
# MMLU Five-shot (Eval/Test only)
|
# MMLU Five-shot (Eval/Test only)
|
||||||
elif trainer.args.mmlu_dataset in ["mmlu", "mmlu-fs"]:
|
elif trainer.args.mmlu_dataset in ["mmlu", "mmlu-fs"]:
|
||||||
mmlu_dataset = load_dataset(
|
mmlu_dataset = load_dataset(
|
||||||
"json",
|
"openaccess-ai-collective/mmlu-evals",
|
||||||
data_files={
|
data_files={
|
||||||
"eval": "data/mmlu/five_shot_mmlu_val.json",
|
"eval": "five_shot_mmlu_val.json",
|
||||||
"test": "data/mmlu/five_shot_mmlu_test.json",
|
"test": "five_shot_mmlu_test.json",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
# mmlu_dataset = mmlu_dataset.remove_columns('subject')
|
# mmlu_dataset = mmlu_dataset.remove_columns('subject')
|
||||||
|
|||||||
Reference in New Issue
Block a user