add depr warning for preprocess --iterable
This commit is contained in:
@@ -13,6 +13,16 @@ class PreprocessCliArgs:
|
|||||||
debug_num_examples: int = field(default=1)
|
debug_num_examples: int = field(default=1)
|
||||||
prompter: Optional[str] = field(default=None)
|
prompter: Optional[str] = field(default=None)
|
||||||
download: Optional[bool] = field(default=True)
|
download: Optional[bool] = field(default=True)
|
||||||
|
iterable: Optional[bool] = field(
|
||||||
|
default=False,
|
||||||
|
metadata={
|
||||||
|
"help": (
|
||||||
|
"[DEPRECATED] No longer supported. For streaming datasets, use "
|
||||||
|
"'axolotl train' and set 'streaming: true' in your YAML config, or "
|
||||||
|
"pass --streaming instead in the CLI."
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -35,10 +35,20 @@ def do_preprocess(cfg: DictDefault, cli_args: PreprocessCliArgs) -> None:
|
|||||||
check_accelerate_default_config()
|
check_accelerate_default_config()
|
||||||
check_user_token()
|
check_user_token()
|
||||||
|
|
||||||
|
if cli_args.iterable:
|
||||||
|
LOG.error(
|
||||||
|
"The --iterable CLI argument for 'axolotl preprocess' is no longer "
|
||||||
|
"supported. For training, set 'streaming: true' in your YAML config or "
|
||||||
|
"pass '--streaming' in your 'axolotl train' command for on-the-fly "
|
||||||
|
"preprocessing."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
for key in ["skip_prepare_dataset", "pretraining_dataset"]:
|
for key in ["skip_prepare_dataset", "pretraining_dataset"]:
|
||||||
if cfg.get(key):
|
if cfg.get(key):
|
||||||
LOG.error(
|
LOG.error(
|
||||||
f"You have set `{key}:`. `preprocess` is not needed. Run the `axolotl train` CLI directly instead."
|
f"You have set `{key}:`. `preprocess` is not needed. Run the 'axolotl "
|
||||||
|
"train' CLI directly instead."
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
@@ -1441,7 +1441,8 @@ class StreamingValidationMixin:
|
|||||||
val_set_size = getattr(self, "val_set_size", 0.0)
|
val_set_size = getattr(self, "val_set_size", 0.0)
|
||||||
if val_set_size and val_set_size > 0:
|
if val_set_size and val_set_size > 0:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Validation splits not supported for streaming datasets, skipping"
|
"Validation splits not supported for streaming datasets, please "
|
||||||
|
"use test_datasets: ... instead"
|
||||||
)
|
)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|||||||
Reference in New Issue
Block a user