fix: handle sharegpt dataset missing (#2035)
* fix: handle sharegpt dataset missing * fix: explanation * feat: add test
This commit is contained in:
@@ -789,7 +789,12 @@ class AxolotlInputConfig(
|
||||
if not ds_cfg.get("type"):
|
||||
continue
|
||||
|
||||
if ds_cfg["type"].startswith("sharegpt"):
|
||||
ds_type = ds_cfg["type"]
|
||||
# skip if it's a dict (for custom user instruction prompt)
|
||||
if isinstance(ds_type, dict):
|
||||
continue
|
||||
|
||||
if isinstance(ds_type, str) and ds_type.startswith("sharegpt"):
|
||||
raise ValueError(
|
||||
"`type: sharegpt.*` is deprecated. Please use `type: chat_template` instead."
|
||||
)
|
||||
|
||||
@@ -234,3 +234,59 @@ class TestValidationCheckDatasetConfig(BaseValidation):
|
||||
)
|
||||
|
||||
_check_config()
|
||||
|
||||
def test_dataset_sharegpt_deprecation(self, minimal_cfg):
|
||||
cfg = DictDefault(
|
||||
minimal_cfg
|
||||
| {
|
||||
"chat_template": "chatml",
|
||||
"datasets": [
|
||||
{
|
||||
"path": "LDJnr/Puffin",
|
||||
"type": "sharegpt",
|
||||
"conversation": "chatml",
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
# Check sharegpt deprecation is raised
|
||||
with pytest.raises(ValueError, match=r".*type: sharegpt.*` is deprecated.*"):
|
||||
validate_config(cfg)
|
||||
|
||||
# Check that deprecation is not thrown for non-str type
|
||||
cfg = DictDefault(
|
||||
minimal_cfg
|
||||
| {
|
||||
"datasets": [
|
||||
{
|
||||
"path": "mhenrichsen/alpaca_2k_test",
|
||||
"type": {
|
||||
"field_instruction": "instruction",
|
||||
"field_output": "output",
|
||||
"field_system": "system",
|
||||
"format": "<|user|> {instruction} {input} <|model|>",
|
||||
"no_input_format": "<|user|> {instruction} <|model|>",
|
||||
"system_prompt": "",
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
validate_config(cfg)
|
||||
|
||||
# Check that deprecation is not thrown for non-sharegpt type
|
||||
cfg = DictDefault(
|
||||
minimal_cfg
|
||||
| {
|
||||
"datasets": [
|
||||
{
|
||||
"path": "mhenrichsen/alpaca_2k_test",
|
||||
"type": "alpaca",
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
validate_config(cfg)
|
||||
|
||||
Reference in New Issue
Block a user