we don't need to call check_dataset_labels when skip_prepare_dataset is set (#2962)

* we don't need to call check_dataset_labels when skip_prepare_dataset is set

* Fix actual bug and revert prior fix

* warn and early return instead of raising an error

* use error
This commit is contained in:
Wing Lian
2025-07-22 10:00:53 -04:00
committed by GitHub
parent 01d8175d48
commit b86a1d47b0

View File

@@ -37,10 +37,11 @@ def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
check_user_token() check_user_token()
for key in ["skip_prepare_dataset", "pretraining_dataset"]: for key in ["skip_prepare_dataset", "pretraining_dataset"]:
if cfg.get("key"): if cfg.get(key):
raise ValueError( LOG.error(
f"You have set `{key}:`. `preprocess` is not needed. Run the `axolotl train` CLI directly instead." f"You have set `{key}:`. `preprocess` is not needed. Run the `axolotl train` CLI directly instead."
) )
return
if not cfg.dataset_prepared_path: if not cfg.dataset_prepared_path:
msg = ( msg = (