better handling when no subjects
This commit is contained in:
@@ -129,9 +129,11 @@ def bench_eval_callback_factory(trainer, tokenizer):
|
|||||||
def transform_subject(example):
|
def transform_subject(example):
|
||||||
# Split on ':' and trim whitespace
|
# Split on ':' and trim whitespace
|
||||||
parts = example["subject"].split(":")
|
parts = example["subject"].split(":")
|
||||||
first_part = parts[0].strip().lower() # Lowercase the first part
|
first_part = (
|
||||||
|
parts[0].strip().lower().replace("-", "_")
|
||||||
|
) # Lowercase the first part
|
||||||
second_part = (
|
second_part = (
|
||||||
parts[1].strip().replace("-", "_")
|
parts[1].strip().replace("-", "_") if len(parts) > 1 else "all"
|
||||||
) # Replace hyphens with underscores
|
) # Replace hyphens with underscores
|
||||||
|
|
||||||
# Return the transformed values
|
# Return the transformed values
|
||||||
|
|||||||
Reference in New Issue
Block a user