@@ -1,3 +1,18 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
import numpy as np
import torch
@@ -168,8 +183,13 @@ def delta2bbox(rois,
[0.0000, 0.3161, 4.1945, 0.6839],
[5.0000, 5.0000, 5.0000, 5.0000]])
"""
- means = deltas.new_tensor(means).view(1, -1).repeat(1, deltas.size(1) // 4)
- stds = deltas.new_tensor(stds).view(1, -1).repeat(1, deltas.size(1) // 4)
+ # fix shape for means and stds for onnx
+ if torch.onnx.is_in_onnx_export():
+ means = deltas.new_tensor(means).view(1, -1).repeat(1, deltas.size(1).numpy() // 4)
+ stds = deltas.new_tensor(stds).view(1, -1).repeat(1, deltas.size(1).numpy() // 4)
+ else:
+ means = deltas.new_tensor(means).view(1, -1).repeat(1, deltas.size(1) // 4)
+ stds = deltas.new_tensor(stds).view(1, -1).repeat(1, deltas.size(1) // 4)
denorm_deltas = deltas * stds + means
dx = denorm_deltas[:, 0::4]
dy = denorm_deltas[:, 1::4]
@@ -178,12 +198,22 @@ def delta2bbox(rois,
max_ratio = np.abs(np.log(wh_ratio_clip))
dw = dw.clamp(min=-max_ratio, max=max_ratio)
dh = dh.clamp(min=-max_ratio, max=max_ratio)
- # Compute center of each roi
- px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
- py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
- # Compute width/height of each roi
- pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw)
- ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh)
+ # improve gather performance on NPU
+ if torch.onnx.is_in_onnx_export():
+ rois_perf = rois.permute(1, 0)
+ # Compute center of each roi
+ px = ((rois_perf[0, :] + rois_perf[2, :]) * 0.5).unsqueeze(1).expand_as(dx)
+ py = ((rois_perf[1, :] + rois_perf[3, :]) * 0.5).unsqueeze(1).expand_as(dy)
+ # Compute width/height of each roi
+ pw = (rois_perf[2, :] - rois_perf[0, :]).unsqueeze(1).expand_as(dw)
+ ph = (rois_perf[3, :] - rois_perf[1, :]).unsqueeze(1).expand_as(dh)
+ else:
+ # Compute center of each roi
+ px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
+ py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
+ # Compute width/height of each roi
+ pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw)
+ ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh)
# Use exp(network energy) to enlarge/shrink each roi
gw = pw * dw.exp()
gh = ph * dh.exp()
@@ -1,9 +1,74 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
import torch
from mmcv.ops.nms import batched_nms
from mmdet.core.bbox.iou_calculators import bbox_overlaps
+class BatchNMSOp(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, bboxes, scores, score_threshold, iou_threshold, max_size_per_class, max_total_size):
+ """
+ boxes (torch.Tensor): boxes in shape (batch, N, C, 4).
+ scores (torch.Tensor): scores in shape (batch, N, C).
+ return:
+ nmsed_boxes: (1, N, 4)
+ nmsed_scores: (1, N)
+ nmsed_classes: (1, N)
+ nmsed_num: (1,)
+ """
+
+ # Phony implementation for onnx export
+ nmsed_boxes = bboxes[:, :max_total_size, 0, :]
+ nmsed_scores = scores[:, :max_total_size, 0]
+ nmsed_classes = torch.arange(max_total_size, dtype=torch.long)
+ nmsed_num = torch.Tensor([max_total_size])
+
+ return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num
+
+ @staticmethod
+ def symbolic(g, bboxes, scores, score_thr, iou_thr, max_size_p_class, max_t_size):
+ nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = g.op('BatchMultiClassNMS',
+ bboxes, scores, score_threshold_f=score_thr, iou_threshold_f=iou_thr,
+ max_size_per_class_i=max_size_p_class, max_total_size_i=max_t_size, outputs=4)
+ return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num
+
+def batch_nms_op(bboxes, scores, score_threshold, iou_threshold, max_size_per_class, max_total_size):
+ """
+ boxes (torch.Tensor): boxes in shape (N, 4).
+ scores (torch.Tensor): scores in shape (N, ).
+ """
+
+ if bboxes.dtype == torch.float32:
+ bboxes = bboxes.reshape(1, bboxes.shape[0].numpy(), -1, 4).half()
+ scores = scores.reshape(1, scores.shape[0].numpy(), -1).half()
+ else:
+ bboxes = bboxes.reshape(1, bboxes.shape[0].numpy(), -1, 4)
+ scores = scores.reshape(1, scores.shape[0].numpy(), -1)
+
+ nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = BatchNMSOp.apply(bboxes, scores,
+ score_threshold, iou_threshold, max_size_per_class, max_total_size)
+ nmsed_boxes = nmsed_boxes.float()
+ nmsed_scores = nmsed_scores.float()
+ nmsed_classes = nmsed_classes.long()
+ dets = torch.cat((nmsed_boxes.reshape((max_total_size, 4)), nmsed_scores.reshape((max_total_size, 1))), -1)
+ labels = nmsed_classes.reshape((max_total_size, ))
+ return dets, labels
+
+
def multiclass_nms(multi_bboxes,
multi_scores,
score_thr,
@@ -36,13 +101,25 @@ def multiclass_nms(multi_bboxes,
if multi_bboxes.shape[1] > 4:
bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4)
else:
- bboxes = multi_bboxes[:, None].expand(
- multi_scores.size(0), num_classes, 4)
+ # export expand operator to onnx more nicely
+ if torch.onnx.is_in_onnx_export:
+ bbox_shape_tensor = torch.ones(multi_scores.size(0), num_classes, 4)
+ bboxes = multi_bboxes[:, None].expand_as(bbox_shape_tensor)
+ else:
+ bboxes = multi_bboxes[:, None].expand(
+ multi_scores.size(0), num_classes, 4)
+
scores = multi_scores[:, :-1]
if score_factors is not None:
scores = scores * score_factors[:, None]
+ # npu
+ if torch.onnx.is_in_onnx_export():
+ dets, labels = batch_nms_op(bboxes, scores, score_thr, nms_cfg.get("iou_threshold"), max_num, max_num)
+ return dets, labels
+
+ # cpu and gpu
labels = torch.arange(num_classes, dtype=torch.long)
labels = labels.view(1, -1).expand_as(scores)
@@ -53,6 +130,8 @@ def multiclass_nms(multi_bboxes,
# remove low scoring boxes
valid_mask = scores > score_thr
inds = valid_mask.nonzero(as_tuple=False).squeeze(1)
+ # vals, inds = torch.topk(scores, 1000)
+
bboxes, scores, labels = bboxes[inds], scores[inds], labels[inds]
if inds.numel() == 0:
if torch.onnx.is_in_onnx_export():
@@ -76,6 +155,7 @@ def multiclass_nms(multi_bboxes,
return dets, labels[keep]
+
def fast_nms(multi_bboxes,
multi_scores,
multi_coeffs,
@@ -1,3 +1,18 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
import torch
import torch.nn as nn
import torch.nn.functional as F
@@ -9,6 +24,56 @@ from .anchor_head import AnchorHead
from .rpn_test_mixin import RPNTestMixin
+class BatchNMSOp(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, bboxes, scores, score_threshold, iou_threshold, max_size_per_class, max_total_size):
+ """
+ boxes (torch.Tensor): boxes in shape (batch, N, C, 4).
+ scores (torch.Tensor): scores in shape (batch, N, C).
+ return:
+ nmsed_boxes: (1, N, 4)
+ nmsed_scores: (1, N)
+ nmsed_classes: (1, N)
+ nmsed_num: (1,)
+ """
+
+ # Phony implementation for onnx export
+ nmsed_boxes = bboxes[:, :max_total_size, 0, :]
+ nmsed_scores = scores[:, :max_total_size, 0]
+ nmsed_classes = torch.arange(max_total_size, dtype=torch.long)
+ nmsed_num = torch.Tensor([max_total_size])
+
+ return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num
+
+ @staticmethod
+ def symbolic(g, bboxes, scores, score_thr, iou_thr, max_size_p_class, max_t_size):
+ nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = g.op('BatchMultiClassNMS',
+ bboxes, scores, score_threshold_f=score_thr, iou_threshold_f=iou_thr,
+ max_size_per_class_i=max_size_p_class, max_total_size_i=max_t_size, outputs=4)
+ return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num
+
+def batch_nms_op(bboxes, scores, score_threshold, iou_threshold, max_size_per_class, max_total_size):
+ """
+ boxes (torch.Tensor): boxes in shape (N, 4).
+ scores (torch.Tensor): scores in shape (N, ).
+ """
+
+ if bboxes.dtype == torch.float32:
+ bboxes = bboxes.reshape(1, bboxes.shape[0].numpy(), -1, 4).half()
+ scores = scores.reshape(1, scores.shape[0].numpy(), -1).half()
+ else:
+ bboxes = bboxes.reshape(1, bboxes.shape[0].numpy(), -1, 4)
+ scores = scores.reshape(1, scores.shape[0].numpy(), -1)
+
+ nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = BatchNMSOp.apply(bboxes, scores,
+ score_threshold, iou_threshold, max_size_per_class, max_total_size) # max_total_size num_bbox
+ nmsed_boxes = nmsed_boxes.float()
+ nmsed_scores = nmsed_scores.float()
+ nmsed_classes = nmsed_classes.long()
+ dets = torch.cat((nmsed_boxes.reshape((max_total_size, 4)), nmsed_scores.reshape((max_total_size, 1))), -1)
+ labels = nmsed_classes.reshape((max_total_size, ))
+ return dets, labels
+
@HEADS.register_module()
class RPNHead(RPNTestMixin, AnchorHead):
"""RPN head.
@@ -132,9 +197,14 @@ class RPNHead(RPNTestMixin, AnchorHead):
if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
# sort is faster than topk
# _, topk_inds = scores.topk(cfg.nms_pre)
- ranked_scores, rank_inds = scores.sort(descending=True)
- topk_inds = rank_inds[:cfg.nms_pre]
- scores = ranked_scores[:cfg.nms_pre]
+ # onnx uses topk to sort, this is simpler for onnx export
+ if torch.onnx.is_in_onnx_export():
+ scores, topk_inds = torch.topk(scores, cfg.nms_pre)
+ else:
+ ranked_scores, rank_inds = scores.sort(descending=True)
+ topk_inds = rank_inds[:cfg.nms_pre]
+ scores = ranked_scores[:cfg.nms_pre]
+
rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
anchors = anchors[topk_inds, :]
mlvl_scores.append(scores)
@@ -164,5 +234,11 @@ class RPNHead(RPNTestMixin, AnchorHead):
# TODO: remove the hard coded nms type
nms_cfg = dict(type='nms', iou_threshold=cfg.nms_thr)
- dets, keep = batched_nms(proposals, scores, ids, nms_cfg)
- return dets[:cfg.nms_post]
+ # npu return
+ if torch.onnx.is_in_onnx_export():
+ dets, labels = batch_nms_op(proposals, scores, 0.0, nms_cfg.get("iou_threshold"), cfg.nms_post, cfg.nms_post)
+ return dets
+ # cpu and gpu return
+ else:
+ dets, keep = batched_nms(proposals, scores, ids, nms_cfg)
+ return dets[:cfg.nms_post]
@@ -1,9 +1,48 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
import torch
from mmcv.runner import force_fp32
from mmdet.models.builder import ROI_EXTRACTORS
from .base_roi_extractor import BaseRoIExtractor
+import torch.onnx.symbolic_helper as sym_help
+
+class RoiExtractor(torch.autograd.Function):
+ @staticmethod
+ def forward(self, f0, f1, f2, f3, rois, aligned=1, finest_scale=56, pooled_height=7, pooled_width=7,
+ pool_mode='avg', roi_scale_factor=0, sample_num=0, spatial_scale=[0.25, 0.125, 0.0625, 0.03125]):
+ """
+ feats (torch.Tensor): feats in shape (batch, 256, H, W).
+ rois (torch.Tensor): rois in shape (k, 5).
+ return:
+ roi_feats (torch.Tensor): (k, 256, pooled_width, pooled_width)
+ """
+
+ # phony implementation for shape inference
+ k = rois.size()[0]
+ roi_feats = torch.ones(k, 256, pooled_height, pooled_width)
+ return roi_feats
+
+ @staticmethod
+ def symbolic(g, f0, f1, f2, f3, rois):
+ # TODO: support tensor list type for feats
+ roi_feats = g.op('RoiExtractor', f0, f1, f2, f3, rois, aligned_i=1, finest_scale_i=56, pooled_height_i=7, pooled_width_i=7,
+ pool_mode_s='avg', roi_scale_factor_i=0, sample_num_i=0, spatial_scale_f=[0.25, 0.125, 0.0625, 0.03125], outputs=1)
+ return roi_feats
@ROI_EXTRACTORS.register_module()
class SingleRoIExtractor(BaseRoIExtractor):
@@ -52,6 +91,12 @@ class SingleRoIExtractor(BaseRoIExtractor):
@force_fp32(apply_to=('feats', ), out_fp16=True)
def forward(self, feats, rois, roi_scale_factor=None):
+ # Work around to export onnx for npu
+ if torch.onnx.is_in_onnx_export():
+ roi_feats = RoiExtractor.apply(feats[0], feats[1], feats[2], feats[3], rois)
+ # roi_feats = RoiExtractor.apply(list(feats), rois)
+ return roi_feats
+
"""Forward function."""
out_size = self.roi_layers[0].output_size
num_levels = len(feats)
@@ -1,3 +1,17 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
import argparse
import os.path as osp
@@ -20,7 +34,8 @@ def pytorch2onnx(config_path,
verify=False,
normalize_cfg=None,
dataset='coco',
- test_img=None):
+ test_img=None,
+ enable_onnx_checker=False):
input_config = {
'input_shape': input_shape,
@@ -49,7 +64,8 @@ def pytorch2onnx(config_path,
keep_initializers_as_inputs=True,
do_constant_folding=True,
verbose=show,
- opset_version=opset_version)
+ opset_version=opset_version,
+ enable_onnx_checker=False)
model.forward = orig_model.forward
print(f'Successfully exported ONNX model: {output_file}')