msit/msmodelslim/test/smoke/configs/w8a8_per_channel_mix.yaml-代码预览-msit:基于昇腾平台的推理工具链项目 - AtomGit

ascend-robot【msmodelslim】【feature】【v1】 W4A4量化精度调优

990fdd11创建于 2025年9月29日历史提交

apiversion: modelslim_v1

default_w8a8_dynamic: &default_w8a8_dynamic
  act:
    scope: "per_token"
    dtype: "int8"
    symmetric: True
    method: "minmax"
  weight:
    scope: "per_channel"
    dtype: "int8"
    symmetric: True
    method: "minmax"

default_w8a8: &default_w8a8
  act:
    scope: "per_tensor"
    dtype: "int8"
    symmetric: False
    method: "minmax"
  weight:
    scope: "per_channel"
    dtype: "int8"
    symmetric: True
    method: "minmax"

spec:
  process:
    - type: "group"
      configs:
        - type: "linear_quant"
          qconfig: *default_w8a8
          include:
            - "*self_attn*"

        - type: "linear_quant"
          qconfig: *default_w8a8_dynamic
          include:
            - "*mlp*"
          exclude:
            - "*gate"

  save:
    - type: "ascendv1_saver"
      part_file_size: 4

  dataset: test.json