data_parallel_size in in VllmserveCliArgs (#3074)
* data_parallel_size in in VllmserveCliArgs * moved to 43
This commit is contained in:
@@ -40,6 +40,12 @@ class VllmServeCliArgs:
|
|||||||
default=None,
|
default=None,
|
||||||
metadata={"help": "Number of tensor parallel workers to use."},
|
metadata={"help": "Number of tensor parallel workers to use."},
|
||||||
)
|
)
|
||||||
|
data_parallel_size: Optional[int] = field(
|
||||||
|
default=None,
|
||||||
|
metadata={
|
||||||
|
"help": "Number of data parallel workers to use for vLLM serving. This controls how many model replicas are used for parallel inference."
|
||||||
|
},
|
||||||
|
)
|
||||||
host: Optional[str] = field(
|
host: Optional[str] = field(
|
||||||
default=None, # nosec B104
|
default=None, # nosec B104
|
||||||
metadata={"help": "Host address to run the server on."},
|
metadata={"help": "Host address to run the server on."},
|
||||||
|
|||||||
Reference in New Issue
Block a user