apiversion: modelslim_v1
default_w8a8_dynamic: &default_w8a8_dynamic
act:
scope: "per_token"
dtype: "int8"
symmetric: True
method: "minmax"
weight:
scope: "per_channel"
dtype: "int8"
symmetric: True
method: "minmax"
default_w8a8: &default_w8a8
act:
scope: "per_tensor"
dtype: "int8"
symmetric: False
method: "minmax"
weight:
scope: "per_channel"
dtype: "int8"
symmetric: True
method: "minmax"
spec:
process:
- type: "group"
configs:
- type: "linear_quant"
qconfig: *default_w8a8
include:
- "*self_attn*"
- type: "linear_quant"
qconfig: *default_w8a8_dynamic
include:
- "*mlp*"
exclude:
- "*gate"
save:
- type: "ascendv1_saver"
part_file_size: 4
dataset: test.json