1. 跑通未接入 AIPP 的推理全流程

具体请参考 ResNet50 推理指导。

2. 理清预处理操作

ResNet50 的预处理操作包括：

读取图片。
把 BGR 格式的图片转换为 RGB 格式的图片。
把图片等比缩放至高度和宽度中较小者等于 256。
裁剪出图片正中间高度和宽度都等于 224 的部分。
归一化，均值为 [123.675, 116.28, 103.53]，方差的倒数为 [0.0171247538316637, 0.0175070028011204, 0.0174291938997821]。
保存为 .bin 文件。

预处理代码详情

ResNet50 的预处理脚本 preprocess_without_aipp.py 内容如下：

import os
import pathlib
from typing import List

import cv2
import numpy as np
from PIL import Image
import tqdm


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description='ResNet50 的预处理脚本')
    parser.add_argument('--input_dir', required=True, help='待处理图片的读取目录')
    parser.add_argument('--output_dir', required=True, help='处理后图片的保存目录')
    return parser.parse_args()


def preprocess(input_dir: str, output_dir: str) -> None:
    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
    filenames = os.listdir(input_dir)
    for filename in tqdm.tqdm(filenames):
        preprocess_single_file(filename, input_dir, output_dir)


def preprocess_single_file(filename: str, input_dir: str, output_dir: str) -> None:
    file_path = str(pathlib.Path(input_dir, filename))
    image = cv2.imread(file_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = resize(image, 256)
    image = center_crop(image, 224)
    image = image.astype(np.float32)
    image = normalize(
        image,
        [123.675, 116.28, 103.53],
        [0.0171247538316637, 0.0175070028011204, 0.0174291938997821]
    )
    image = image.transpose(2, 0, 1)
    # ais_bench 无法处理 .npy 文件，故保存为 .bin 文件
    image.tofile(pathlib.Path(output_dir, filename).with_suffix('.bin'))

def resize(image: np.ndarray, target_size: int) -> np.ndarray:
    # 缩放至高度和宽度中较小者等于 target_size
    origin_h, origin_w, _ = image.shape
    if origin_h < origin_w:
        target_h = target_size
        target_w = int(target_size * origin_w / origin_h)
    else:
        target_w = target_size
        target_h = int(target_size * origin_h / origin_w)
    return cv2.resize(image, (target_w, target_h))


def center_crop(image: np.ndarray, target_size: int) -> np.ndarray:
    # 裁剪出图片正中间高度和宽度都等于 target_size 的部分
    origin_h, origin_w, _ = image.shape
    h_start = round((origin_h - target_size) / 2)
    w_start = round((origin_w - target_size) / 2)
    return image[h_start:h_start + target_size, w_start:w_start + target_size]


def normalize(image: np.ndarray, means: List[float], var_recis: List[float]) -> np.ndarray:
    for i in range(image.shape[-1]):
        image[:, :, i] = (image[:, :, i] - means[i]) * var_recis[i]
    return image


if __name__ == '__main__':
    args = parse_args()
    preprocess(args.input_dir, args.output_dir)

脚本使用示例：

python preprocess_without_aipp.py \
  --input_dir="${image_dir}" \
  --output_dir=preprocess_result_without_aipp

3. 写 AIPP 配置文件

ResNet50 的 AIPP 配置文件 resnet50.aippconfig 内容如下：

aipp_op {
    aipp_mode: static # 静态 AIPP
    input_format: RGB888_U8 # 图片输入格式，需要把图片转为 RGB 格式

    # 原始图片的宽度、高度
    src_image_size_w: 224
    src_image_size_h: 224    

    # 三个通道的归一化参数
    # pixel_out_chx(i) = [pixel_in_chx(i) – mean_chn(i) – min_chn(i)] * var_reci_chn(i)
    # 因为 mean_chn_i 只支持 uint8，所以用 min_chn_i 设置 mean 的值
    min_chn_0: 123.675
    min_chn_1: 116.28
    min_chn_2: 103.53
    var_reci_chn_0: 0.0171247538316637
    var_reci_chn_1: 0.0175070028011204
    var_reci_chn_2: 0.0174291938997821
}

4. 修改预处理代码

改动包含以下内容：

删除归一化操作。
删除把 HWC 转为 CHW 的操作。
原本输出 float32 类型的 .bin 文件，现改为输出 uint8 类型的 .bin 文件。

修改后的预处理代码详情

import argparse
import os
import pathlib
from typing import List

import cv2
import numpy as np
from PIL import Image
import tqdm


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description='ResNet50 的预处理脚本')
    parser.add_argument('--input_dir', required=True, help='待处理图片的读取目录')
    parser.add_argument('--output_dir', required=True, help='处理后图片的保存目录')
    return parser.parse_args()


def preprocess(input_dir: str, output_dir: str) -> None:
    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
    filenames = os.listdir(input_dir)
    for filename in tqdm.tqdm(filenames):
        preprocess_single_file(filename, input_dir, output_dir)


def preprocess_single_file(filename: str, input_dir: str, output_dir: str) -> None:
    file_path = str(pathlib.Path(input_dir, filename))
    image = cv2.imread(file_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = resize(image, 256)
    image = center_crop(image, 224)
    image = image.astype(np.uint8)
    # ais_bench 无法处理 .npy 文件，故保存为 .bin 文件
    image.tofile(pathlib.Path(output_dir, filename).with_suffix('.bin'))

def resize(image: np.ndarray, target_size: int) -> np.ndarray:
    # 缩放至高度和宽度中较小者等于 target_size
    origin_h, origin_w, _ = image.shape
    if origin_h < origin_w:
        target_h = target_size
        target_w = int(target_size * origin_w / origin_h)
    else:
        target_w = target_size
        target_h = int(target_size * origin_h / origin_w)
    return cv2.resize(image, (target_w, target_h))


def center_crop(image: np.ndarray, target_size: int) -> np.ndarray:
    # 裁剪出图片正中间高度和宽度都等于 target_size 的部分
    origin_h, origin_w, _ = image.shape
    h_start = round((origin_h - target_size) / 2)
    w_start = round((origin_w - target_size) / 2)
    return image[h_start:h_start + target_size, w_start:w_start + target_size]


if __name__ == '__main__':
    args = parse_args()
    preprocess(args.input_dir, args.output_dir)

5. ONNX 转 OM 时使用 AIPP 配置

使用 AIPP 配置，atc 命令需要增加 insert_op_conf 和 enable_small_channel 参数。

atc \
  --framework=5 \
  --model=resnet50_official.onnx \
  --output=resnet50_bs64_with_aipp \
  --input_format='NCHW' \
  --input_shape='actual_input_1:64,3,224,224' \
  --insert_op_conf=resnet50.aippconfig \
  --enable_small_channel=1 \
  --soc_version="Ascend${chip_name}"

参数说明：

insert_op_conf：插入算子的配置文件路径与文件名，例如AIPP预处理算子。若使用该参数后，输入数据类型为UINT8。
enable_small_channel：是否使能small channel的优化，使能后在channel<=4的卷积层会有性能收益。建议用户在推理场景下打开此开关。

更多参数说明请参考 atc 参数概览（CANN 社区版文档 > 应用开发 > ATC 模型转换 > 参数说明 > 参数概览）。

6. 对比接入 AIPP 前后的精度和性能

模型	数据集	芯片	Batch Size	精度	性能
接入 AIPP 前的 ResNet50	ImageNet 验证集 50000 张图片	300I Pro	64	Top1 75.37% Top2 85.49% Top3 89.24% Top4 91.27% Top5 92.52%	2630 fps
接入 AIPP 后的 ResNet50	ImageNet 验证集 50000 张图片	300I Pro	64	Top1 75.37% Top2 85.48% Top3 89.23% Top4 91.27% Top5 92.51%	2929 fps