From aefd4d74fa5fb3df85cad4a2146551f9a52b5d01 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Mon, 21 Aug 2023 17:22:06 -0400 Subject: [PATCH] better handling when no subjects --- src/axolotl/utils/callbacks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/axolotl/utils/callbacks.py b/src/axolotl/utils/callbacks.py index 2f632a294..9ea949a7c 100644 --- a/src/axolotl/utils/callbacks.py +++ b/src/axolotl/utils/callbacks.py @@ -129,9 +129,11 @@ def bench_eval_callback_factory(trainer, tokenizer): def transform_subject(example): # Split on ':' and trim whitespace parts = example["subject"].split(":") - first_part = parts[0].strip().lower() # Lowercase the first part + first_part = ( + parts[0].strip().lower().replace("-", "_") + ) # Lowercase the first part second_part = ( - parts[1].strip().replace("-", "_") + parts[1].strip().replace("-", "_") if len(parts) > 1 else "all" ) # Replace hyphens with underscores # Return the transformed values