apiversion: modelslim_v1

spec:
  process:
    - type: "linear_quant"
      qconfig:
        act:
          scope: "per_token"
          dtype: "fp8_e4m3"
          symmetric: True
          method: "minmax"
        weight:
          scope: "per_channel"
          dtype: "fp8_e4m3"
          symmetric: True
          method: "minmax"
      include:
        - "*"
      exclude:
        - "*mlp*"

  save:
    - type: "ascendv1_saver"
      part_file_size: 4