chore: cleanup deprecated config elements (#2309)

* feat: update metadata fields and refactor config class in axolotlinputconfig

- Replace `metadata` fields with `json_schema_extra` in RayConfig class.
- Replace `Config` class with `ConfigDict` in AxolotlInputConfig.
- Set `populate_by_name` to `True` directly in `ConfigDict` instance.

* feat: update axolotlinputconfig in utils

* Replace `conlist` with `Annotated` for `datasets`, `test_datasets`, and `pretraining_dataset` fields
* Change default values for `lr_scheduler` and `optimizer` fields in `HyperparametersConfig` class
* Remove unnecessary Union from `evals_per_epoch` field in `AxolotlInputConfig` class
* Import `MinLen` from `annotated_types` module
* Remove import of `conlist` from `pydantic` module

* feat: update modelinputconfig and axolotlinputconfig in v0_4_1

- Removed ConfigDict import from pydantic in `src/axolotl/utils/config/models/input/v0_4_1/__init__.py`
- Added `model_config` with `protected_namespaces` to ModelInputConfig
- Replaced `config: ConfigDict` with `model_config` in AxolotlInputConfig
- Set `populate_by_name` to True in `model_config` for AxolotlInputConfig

* chore: get rid of unused import
This commit is contained in:
NJordan72
2025-02-18 03:39:24 -05:00
committed by GitHub
parent b194e17c28
commit 91bb95685a

View File

@@ -6,12 +6,12 @@ import os
from enum import Enum from enum import Enum
from typing import Annotated, Any, Dict, List, Literal, Optional, Tuple, Union from typing import Annotated, Any, Dict, List, Literal, Optional, Tuple, Union
from annotated_types import MinLen
from packaging import version from packaging import version
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
Field, Field,
StringConstraints, StringConstraints,
conlist,
field_serializer, field_serializer,
field_validator, field_validator,
model_validator, model_validator,
@@ -435,6 +435,8 @@ class ReLoRAConfig(BaseModel):
class ModelInputConfig(BaseModel): class ModelInputConfig(BaseModel):
"""model to train on configuration subset""" """model to train on configuration subset"""
model_config = {"protected_namespaces": ()}
base_model: str base_model: str
base_model_config: Optional[str] = None base_model_config: Optional[str] = None
cls_model_config: Optional[str] = None cls_model_config: Optional[str] = None
@@ -501,7 +503,7 @@ class HyperparametersConfig(BaseModel):
"adopt_adamw", "adopt_adamw",
], ],
] ]
] = OptimizerNames.ADAMW_HF.value ] = OptimizerNames.ADAMW_HF
optim_args: Optional[Union[str, Dict[str, Any]]] = Field( optim_args: Optional[Union[str, Dict[str, Any]]] = Field(
default=None, default=None,
json_schema_extra={"description": "Optional arguments to supply to optimizer."}, json_schema_extra={"description": "Optional arguments to supply to optimizer."},
@@ -513,7 +515,9 @@ class HyperparametersConfig(BaseModel):
}, },
) )
torchdistx_path: Optional[str] = None torchdistx_path: Optional[str] = None
lr_scheduler: Optional[Union[SchedulerType, Literal["one_cycle"]]] = "cosine" lr_scheduler: Optional[
Union[SchedulerType, Literal["one_cycle"]]
] = SchedulerType.COSINE
lr_scheduler_kwargs: Optional[Dict[str, Any]] = None lr_scheduler_kwargs: Optional[Dict[str, Any]] = None
lr_quadratic_warmup: Optional[bool] = None lr_quadratic_warmup: Optional[bool] = None
cosine_min_lr_ratio: Optional[float] = None cosine_min_lr_ratio: Optional[float] = None
@@ -637,19 +641,19 @@ class RayConfig(BaseModel):
use_ray: bool = Field(default=False) use_ray: bool = Field(default=False)
ray_run_name: Optional[str] = Field( ray_run_name: Optional[str] = Field(
default=None, default=None,
metadata={ json_schema_extra={
"help": "The training results will be saved at `saves/ray_run_name`." "help": "The training results will be saved at `saves/ray_run_name`."
}, },
) )
ray_num_workers: int = Field( ray_num_workers: int = Field(
default=1, default=1,
metadata={ json_schema_extra={
"help": "The number of workers for Ray training. Default is 1 worker." "help": "The number of workers for Ray training. Default is 1 worker."
}, },
) )
resources_per_worker: dict = Field( resources_per_worker: dict = Field(
default_factory=lambda: {"GPU": 1}, default_factory=lambda: {"GPU": 1},
metadata={ json_schema_extra={
"help": "The resources per worker for Ray training. Default is to use 1 GPU per worker." "help": "The resources per worker for Ray training. Default is to use 1 GPU per worker."
}, },
) )
@@ -674,10 +678,7 @@ class AxolotlInputConfig(
): ):
"""wrapper of all config options""" """wrapper of all config options"""
class Config: model_config = {"populate_by_name": True}
"""Config for alias"""
populate_by_name = True
strict: Optional[bool] = Field(default=False) strict: Optional[bool] = Field(default=False)
resume_from_checkpoint: Optional[str] = None resume_from_checkpoint: Optional[str] = None
@@ -699,15 +700,28 @@ class AxolotlInputConfig(
] = None # whether to use weighting in DPO trainer. If none, default is false in the trainer. ] = None # whether to use weighting in DPO trainer. If none, default is false in the trainer.
dpo_use_logits_to_keep: Optional[bool] = None dpo_use_logits_to_keep: Optional[bool] = None
datasets: Optional[conlist(DatasetConfig, min_length=1)] = None # type: ignore datasets: Optional[
test_datasets: Optional[conlist(DatasetConfig, min_length=1)] = None # type: ignore Annotated[
list[Union[SFTDataset, DPODataset, KTODataset, StepwiseSupervisedDataset]],
MinLen(1),
]
] = None
test_datasets: Optional[
Annotated[
list[Union[SFTDataset, DPODataset, KTODataset, StepwiseSupervisedDataset]],
MinLen(1),
]
] = None
shuffle_merged_datasets: Optional[bool] = True shuffle_merged_datasets: Optional[bool] = True
dataset_prepared_path: Optional[str] = None dataset_prepared_path: Optional[str] = None
dataset_shard_num: Optional[int] = None dataset_shard_num: Optional[int] = None
dataset_shard_idx: Optional[int] = None dataset_shard_idx: Optional[int] = None
skip_prepare_dataset: Optional[bool] = False skip_prepare_dataset: Optional[bool] = False
pretraining_dataset: Optional[conlist(Union[PretrainingDataset, SFTDataset], min_length=1)] = Field( # type: ignore pretraining_dataset: Optional[
Annotated[list[Union[PretrainingDataset, SFTDataset]], MinLen(1)]
] = Field(
default=None, default=None,
json_schema_extra={"description": "streaming dataset to use for pretraining"}, json_schema_extra={"description": "streaming dataset to use for pretraining"},
) )
@@ -850,7 +864,7 @@ class AxolotlInputConfig(
warmup_steps: Optional[int] = None warmup_steps: Optional[int] = None
warmup_ratio: Optional[float] = None warmup_ratio: Optional[float] = None
eval_steps: Optional[Union[int, float]] = None eval_steps: Optional[Union[int, float]] = None
evals_per_epoch: Optional[Union[int]] = None evals_per_epoch: Optional[int] = None
eval_strategy: Optional[str] = None eval_strategy: Optional[str] = None
save_steps: Optional[Union[int, float]] = None save_steps: Optional[Union[int, float]] = None
saves_per_epoch: Optional[int] = None saves_per_epoch: Optional[int] = None