qat doc updates (#3162) [skip-ci]
This commit is contained in:
@@ -22,8 +22,8 @@ Quantization is configured using the `quantization` key in your configuration fi
|
||||
```yaml
|
||||
base_model: # The path to the model to quantize.
|
||||
quantization:
|
||||
weight_dtype: # Optional[str] = "int8". Fake quantization layout to use for weight quantization. Valid options are uintX for X in [1, 2, 3, 4, 5, 6, 7], or int4, or int8
|
||||
activation_dtype: # Optional[str] = "int8". Fake quantization layout to use for activation quantization. Valid options are "int4" and "int8"
|
||||
activation_dtype: # Optional[str] = "int8". Fake quantization layout to use for activation quantization. Valid options are "int4", "int8", "float8"
|
||||
weight_dtype: # Optional[str] = "int8". Fake quantization layout to use for weight quantization. Valid options are "int4", "fp8", and "nvfp4".
|
||||
group_size: # Optional[int] = 32. The number of elements in each group for per-group fake quantization
|
||||
quantize_embedding: # Optional[bool] = False. Whether to quantize the embedding layer.
|
||||
|
||||
@@ -39,9 +39,8 @@ you used to train the model:
|
||||
# qat.yml
|
||||
qat:
|
||||
activation_dtype: int8
|
||||
weight_dtype: int8
|
||||
weight_dtype: int4
|
||||
group_size: 256
|
||||
quantize_embedding: true
|
||||
|
||||
output_dir: # The path to the output directory used during training where the final checkpoint has been saved.
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user