From 94c00c1d041b0e21b694abc6ba5923a19e013d9c Mon Sep 17 00:00:00 2001 From: Dan Saunders Date: Fri, 21 Mar 2025 11:23:39 -0400 Subject: [PATCH] pre-commit --- src/axolotl/utils/schemas/config.py | 41 ++++++++++++++++----------- src/axolotl/utils/schemas/training.py | 12 ++++---- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/axolotl/utils/schemas/config.py b/src/axolotl/utils/schemas/config.py index 83be0288b..7676a50a8 100644 --- a/src/axolotl/utils/schemas/config.py +++ b/src/axolotl/utils/schemas/config.py @@ -1,4 +1,5 @@ """Main Axolotl input configuration Pydantic models""" + # pylint: disable=too-many-lines import logging @@ -91,24 +92,30 @@ class AxolotlInputConfig( dpo_use_weighting: bool | None = None dpo_use_logits_to_keep: bool | None = None - datasets: Annotated[ - list[SFTDataset | DPODataset | KTODataset | StepwiseSupervisedDataset], - MinLen(1), - ] | None = None + datasets: ( + Annotated[ + list[SFTDataset | DPODataset | KTODataset | StepwiseSupervisedDataset], + MinLen(1), + ] + | None + ) = None - test_datasets: Annotated[ - list[SFTDataset | DPODataset | KTODataset | StepwiseSupervisedDataset], - MinLen(1), - ] | None = None + test_datasets: ( + Annotated[ + list[SFTDataset | DPODataset | KTODataset | StepwiseSupervisedDataset], + MinLen(1), + ] + | None + ) = None shuffle_merged_datasets: bool | None = True dataset_prepared_path: str | None = None dataset_shard_num: int | None = None dataset_shard_idx: int | None = None skip_prepare_dataset: bool | None = False - pretraining_dataset: Annotated[ - list[PretrainingDataset | SFTDataset], MinLen(1) - ] | None = Field( + pretraining_dataset: ( + Annotated[list[PretrainingDataset | SFTDataset], MinLen(1)] | None + ) = Field( default=None, json_schema_extra={"description": "streaming dataset to use for pretraining"}, ) @@ -232,9 +239,9 @@ class AxolotlInputConfig( deepspeed: str | dict[str, Any] | None = None fsdp: list[str] | None = None fsdp_config: dict[str, Any] | None = None - fsdp_final_state_dict_type: Literal[ - "FULL_STATE_DICT", "LOCAL_STATE_DICT", "SHARDED_STATE_DICT" - ] | None = None + fsdp_final_state_dict_type: ( + Literal["FULL_STATE_DICT", "LOCAL_STATE_DICT", "SHARDED_STATE_DICT"] | None + ) = None val_set_size: float | None = Field(default=0.0) @@ -244,9 +251,9 @@ class AxolotlInputConfig( torch_compile: Literal["auto"] | bool | None = None torch_compile_backend: str | None = None - torch_compile_mode: Literal[ - "default", "reduce-overhead", "max-autotune" - ] | None = None + torch_compile_mode: Literal["default", "reduce-overhead", "max-autotune"] | None = ( + None + ) max_steps: int | None = None warmup_steps: int | None = None diff --git a/src/axolotl/utils/schemas/training.py b/src/axolotl/utils/schemas/training.py index 6977f2c74..2ab4b4286 100644 --- a/src/axolotl/utils/schemas/training.py +++ b/src/axolotl/utils/schemas/training.py @@ -50,9 +50,9 @@ class HyperparametersConfig(BaseModel): embedding_lr: float | None = None embedding_lr_scale: float | None = None weight_decay: float | None = 0.0 - optimizer: ( - OptimizerNames | CustomSupportedOptimizers - ) | None = OptimizerNames.ADAMW_TORCH_FUSED + optimizer: (OptimizerNames | CustomSupportedOptimizers) | None = ( + OptimizerNames.ADAMW_TORCH_FUSED + ) optim_args: (str | dict[str, Any]) | None = Field( default=None, json_schema_extra={"description": "Optional arguments to supply to optimizer."}, @@ -64,9 +64,9 @@ class HyperparametersConfig(BaseModel): }, ) torchdistx_path: str | None = None - lr_scheduler: ( - SchedulerType | Literal["one_cycle"] | Literal["rex"] - ) | None = SchedulerType.COSINE + lr_scheduler: (SchedulerType | Literal["one_cycle"] | Literal["rex"]) | None = ( + SchedulerType.COSINE + ) lr_scheduler_kwargs: dict[str, Any] | None = None lr_quadratic_warmup: bool | None = None cosine_min_lr_ratio: float | None = None