feat:add support dataset_num_processes (#3129) [skip ci]

* feat:add support dataset_num_processes

* chore

* required changes

* requested chnages

* required chnages

* required changes

* required changes

* elif get_default_process_count()

* add:del data

* Update cicd/Dockerfile.jinja

Co-authored-by: NanoCode012 <kevinvong@rocketmail.com>

* Update cicd/single_gpu.py

Co-authored-by: NanoCode012 <kevinvong@rocketmail.com>

---------

Co-authored-by: salman <salman.mohammadi@outlook.com>
Co-authored-by: NanoCode012 <kevinvong@rocketmail.com>
This commit is contained in:
VED
2025-10-13 15:48:12 +05:30
committed by GitHub
parent 143dea4753
commit cd856b45b1
18 changed files with 57 additions and 34 deletions

View File

@@ -141,7 +141,7 @@ class TestDatasetPreparation:
"type": "alpaca",
},
],
"dataset_processes": 4,
"dataset_num_proc": 4,
}
)
@@ -180,7 +180,7 @@ class TestDatasetPreparation:
"type": "alpaca",
},
],
"dataset_processes": 4,
"dataset_num_proc": 4,
}
)
@@ -219,7 +219,7 @@ class TestDatasetPreparation:
"type": "alpaca",
},
],
"dataset_processes": 4,
"dataset_num_proc": 4,
}
)
@@ -252,7 +252,7 @@ class TestDatasetPreparation:
"type": "alpaca",
},
],
"dataset_processes": 4,
"dataset_num_proc": 4,
}
)
@@ -285,7 +285,7 @@ class TestDatasetPreparation:
"type": "alpaca",
},
],
"dataset_processes": 4,
"dataset_num_proc": 4,
}
)
@@ -370,7 +370,7 @@ class TestDatasetPreparation:
"rl": "dpo",
"chat_template": "llama3",
"datasets": [ALPACA_MESSAGES_CONFIG_REVISION],
"dataset_processes": 4,
"dataset_num_proc": 4,
}
)
@@ -471,7 +471,7 @@ class TestDatasetPreparation:
"type": "alpaca",
},
],
"dataset_processes": 4,
"dataset_num_proc": 4,
}
)