we don't need to call check_dataset_labels when skip_prepare_dataset is set (#2962)

* we don't need to call check_dataset_labels when skip_prepare_dataset is set

* Fix actual bug and revert prior fix

* warn and early return instead of raising an error

* use error
This commit is contained in:
Wing Lian
2025-07-22 10:00:53 -04:00
committed by GitHub
parent 01d8175d48
commit b86a1d47b0

View File

@@ -37,10 +37,11 @@ def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
check_user_token()
for key in ["skip_prepare_dataset", "pretraining_dataset"]:
if cfg.get("key"):
raise ValueError(
if cfg.get(key):
LOG.error(
f"You have set `{key}:`. `preprocess` is not needed. Run the `axolotl train` CLI directly instead."
)
return
if not cfg.dataset_prepared_path:
msg = (