msmodelslim/lab_practice/glm4_6v/glm4_6v_w8a8.yaml-代码预览-MindStudio-ModelSlim:基于昇腾生态的模型压缩工具项目 - AtomGit

ascend-robotGlm-4.6V模型W8A8量化适配【part1】增加Glm-4.6V模型适配器与推荐实践

apiversion: multimodal_vlm_modelslim_v1
metadata:
  config_id: glm4_6v_w8a8
  score: 90
  verified_model_types:
    - GLM-4.6V
  label:
    w_bit: 8
    a_bit: 8
    is_sparse: False
    kv_cache: False

default_w8a8_dynamic: &default_w8a8_dynamic
  act:
    scope: "per_token"
    dtype: "int8"
    symmetric: True
    method: "minmax"
  weight:
    scope: "per_channel"
    dtype: "int8"
    symmetric: True
    method: "minmax"

default_w8a8: &default_w8a8
  act:
    scope: "per_tensor"
    dtype: "int8"
    symmetric: False
    method: "minmax"
  weight:
    scope: "per_channel"
    dtype: "int8"
    symmetric: True
    method: "minmax"

spec:
  process:
    - type: "iter_smooth"
      alpha: 0.9  # 浮点数, > 0, 默认 0.9，平衡参数，控制激活和权重的相对重要性。
      scale_min: 1e-5  # 浮点数, > 0, 默认 1e-5，缩放因子的下界，防止数值过小导致数值不稳定。
      symmetric: True  # 使用is_shift=True时，应该将symmetric设置为False
      enable_subgraph_type:
        - 'norm-linear'
        - 'linear-linear'
        - 'ov'
        - 'up-down'
      include:                             
        - "*"
    - type: "linear_quant"
      qconfig: *default_w8a8
      include:
        - "*"
      exclude:
        - "*experts*"
        - "*merger*"
        - "*mlp.gate"
        - "*visual.blocks.*.mlp.down_proj"
        - "*language_model.layers.0.self_attn*"
    - type: "linear_quant"
      qconfig: *default_w8a8_dynamic
      include:
        - "*experts*"
      exclude:
        - "*merger*"
        - "*mlp.gate"
        - "*visual.blocks.*.mlp.down_proj"
        - "*language_model.layers.0.self_attn*"
  save:
    - type: "ascendv1_saver"
      part_file_size: 4
  dataset: "calibImages"  # Short name: auto-searches in lab_calib/
  default_text: "Describe this image in detail."