apiversion: modelslim_v1
metadata:
config_id: qwen3-235B-w4a8-v1
score: 90
verified_model_types:
- Qwen3-235B
label:
w_bit: 4
a_bit: 8
is_sparse: False
kv_cache: False
default_w8a8_dynamic: &default_w8a8_dynamic
act:
scope: "per_token"
dtype: "int8"
symmetric: True
method: "minmax"
weight:
scope: "per_channel"
dtype: "int8"
symmetric: True
method: "minmax"
default_w4a8_dynamic: &default_w4a8_dynamic
act:
scope: "per_token"
dtype: "int8"
symmetric: True
method: "minmax"
weight:
scope: "per_channel"
dtype: "int4"
symmetric: True
method: "ssz"
spec:
process:
- type: "flex_smooth_quant"
enable_subgraph_type:
- 'norm-linear'
include:
- "*"
- type: "group"
configs:
- type: "linear_quant"
qconfig: *default_w8a8_dynamic
include: ["*self_attn*"]
- type: "linear_quant"
qconfig: *default_w4a8_dynamic
include: ["*mlp.experts*"]
save:
- type: "ascendv1_saver"
part_file_size: 4