apiversion: modelslim_v1

default_w8a8_dynamic: &default_w8a8_dynamic
  act:
    scope: "per_token"
    dtype: "int8"
    symmetric: True
    method: "minmax"
  weight:
    scope: "per_channel"
    dtype: "int8"
    symmetric: True
    method: "minmax"

default_w8a8: &default_w8a8
  act:
    scope: "per_tensor"
    dtype: "int8"
    symmetric: False
    method: "minmax"
  weight:
    scope: "per_channel"
    dtype: "int8"
    symmetric: True
    method: "minmax"

spec:
  process:
    - type: "group"
      configs:
        - type: "linear_quant"
          qconfig: *default_w8a8
          include:
            - "*self_attn*"

        - type: "linear_quant"
          qconfig: *default_w8a8_dynamic
          include:
            - "*mlp*"
          exclude:
            - "*gate"

  save:
    - type: "ascendv1_saver"
      part_file_size: 4

  dataset: test.json