apiversion: "modelslim_v1"
default_w8a8_dynamic: &default_w8a8_dynamic
weight:
scope: "per_channel"
dtype: "int8"
symmetric: true
method: "autoround"
ext:
scale_dtype: "bfloat16"
act:
scope: "per_token"
dtype: "int8"
symmetric: true
method: "minmax"
ext:
scale_dtype: "bfloat16"
default_w4a4_dynamic: &default_w4a4_dynamic
weight:
scope: "per_group"
dtype: "int4"
symmetric: true
method: "autoround"
ext:
group_size: 32
scale_dtype: "bfloat16"
act:
scope: "per_token"
dtype: "int4"
symmetric: true
method: "minmax"
ext:
scale_dtype: "bfloat16"
spec:
process:
- type: "iter_smooth"
alpha: 0.9
scale_min: 1e-5
symmetric: False
enable_subgraph_type: [ "ov", "up-down" ]
- type: "quarot"
online: True
block_size: 32
max_tp_size: 2
down_proj_online_layers: [ 1 ]
- type: "iter_smooth"
alpha: 0.9
scale_min: 1e-5
symmetric: False
enable_subgraph_type: [ "norm-linear" ]
- type: "autoround_quant"
iters: 1
enable_minmax_tuning: True
enable_round_tuning: True
strategies:
- qconfig: *default_w8a8_dynamic
exclude:
- "*.up_proj"
- "*.gate_proj"
- "*.o_proj"
- "model.layers.1.mlp.down_proj"
- qconfig: *default_w4a4_dynamic
include:
- "*.up_proj"
- "*.gate_proj"
- "*.o_proj"
exclude:
- "model.layers.1.mlp.down_proj"
save:
- type: "ascendv1_saver"
part_file_size: 4
dataset: "test.json"