msmodelslim/lab_practice/qwen3_next/qwen3-next-80b-a3b-w8a8.yaml-代码预览-MindStudio-ModelSlim:基于昇腾生态的模型压缩工具项目 - AtomGit

ascend-robot【msmodelslim】修改qwen3-next的yaml

apiversion: modelslim_v1
metadata:
  config_id: qwen3_next_80b_a3b_w8a8
  score: 90
  verified_model_types:
    - Qwen3-Next-80B-A3B-Instruct
  label:
    w_bit: 8
    a_bit: 8
    is_sparse: False
    kv_cache: False

default_w8a8_dynamic: &default_w8a8_dynamic
  act:
    scope: "per_token"
    dtype: "int8"
    symmetric: True
    method: "minmax"
  weight:
    scope: "per_channel"
    dtype: "int8"
    symmetric: True
    method: "minmax"

spec:
  process:
    - type: "flex_smooth_quant"
      enable_subgraph_type:
        - 'norm-linear'
      include:
        - '*'
    - type: "group"
      configs:
        - type: "linear_quant"
          qconfig: *default_w8a8_dynamic
          include: ["*self_attn*"]
          exclude: ["*self_attn.o_proj*"]
        - type: "linear_quant"
          qconfig: *default_w8a8_dynamic
          include: ["*mlp.experts*"]
        - type: "linear_quant"
          qconfig: *default_w8a8_dynamic
          include: ["*linear_attn.in_proj_qkvz*"]

  save:
    - type: "ascendv1_saver"
      part_file_size: 4