--- Swin-Transformer-Object-Detection/configs/swin/cascade_mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py	2022-08-03 17:10:43.910705201 -0900

+++ swint/configs/swin/cascade_mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py	2022-08-03 17:01:55.022687107 -0900

@@ -33,7 +33,7 @@

                     target_stds=[0.1, 0.1, 0.2, 0.2]),

                 reg_class_agnostic=False,

                 reg_decoded_bbox=True,

-                norm_cfg=dict(type='SyncBN', requires_grad=True),

+                norm_cfg=dict(type='BN', requires_grad=True),

                 loss_cls=dict(

                     type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

                 loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),

@@ -52,7 +52,7 @@

                     target_stds=[0.05, 0.05, 0.1, 0.1]),

                 reg_class_agnostic=False,

                 reg_decoded_bbox=True,

-                norm_cfg=dict(type='SyncBN', requires_grad=True),

+                norm_cfg=dict(type='BN', requires_grad=True),

                 loss_cls=dict(

                     type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

                 loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),

@@ -71,7 +71,7 @@

                     target_stds=[0.033, 0.033, 0.067, 0.067]),

                 reg_class_agnostic=False,

                 reg_decoded_bbox=True,

-                norm_cfg=dict(type='SyncBN', requires_grad=True),

+                norm_cfg=dict(type='BN', requires_grad=True),

                 loss_cls=dict(

                     type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),

                 loss_bbox=dict(type='GIoULoss', loss_weight=10.0))



--- Swin-Transformer-Object-Detection/mmdet/core/post_processing/bbox_nms.py	2022-08-03 17:10:43.950705203 -0900

+++ swint/mmdet/core/post_processing/bbox_nms.py	2022-08-03 17:03:47.662690961 -0900

@@ -4,6 +4,58 @@

 from mmdet.core.bbox.iou_calculators import bbox_overlaps





+class BatchNMSOp(torch.autograd.Function):

+    @staticmethod

+    def forward(ctx, bboxes, scores, score_threshold, iou_threshold, max_size_per_class, max_total_size):

+        """

+        boxes (torch.Tensor): boxes in shape (batch, N, C, 4).

+        scores (torch.Tensor): scores in shape (batch, N, C).

+        return:

+            nmsed_boxes: (1, N, 4)

+            nmsed_scores: (1, N)

+            nmsed_classes: (1, N)

+            nmsed_num: (1,)

+        """

+

+        # Phony implementation for onnx export

+        nmsed_boxes = bboxes[:, :max_total_size, 0, :]

+        nmsed_scores = scores[:, :max_total_size, 0]

+        nmsed_classes = torch.zeros(max_total_size, dtype=torch.long)

+        nmsed_num = torch.Tensor([max_total_size])

+

+        return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num

+

+    @staticmethod

+    def symbolic(g, bboxes, scores, score_thr, iou_thr, max_size_p_class, max_t_size):

+        nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = g.op('BatchMultiClassNMS',

+            bboxes, scores, score_threshold_f=score_thr, iou_threshold_f=iou_thr,

+            max_size_per_class_i=max_size_p_class, max_total_size_i=max_t_size, outputs=4)

+        return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num

+

+def batch_nms_op(bboxes, scores, score_threshold, iou_threshold, max_size_per_class, max_total_size):

+    """

+    boxes (torch.Tensor): boxes in shape (N, 4).

+    scores (torch.Tensor): scores in shape (N, ).

+    """

+

+    num_classes = bboxes.shape[1].numpy() // 4

+    if bboxes.dtype == torch.float32:

+        bboxes = bboxes.reshape(1, bboxes.shape[0].numpy(), -1, 4).half()

+        scores = scores.reshape(1, scores.shape[0].numpy(), -1).half()

+    else:

+        bboxes = bboxes.reshape(1, bboxes.shape[0].numpy(), -1, 4)

+        scores = scores.reshape(1, scores.shape[0].numpy(), -1)

+

+    nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = BatchNMSOp.apply(bboxes, scores,

+        score_threshold, iou_threshold, max_size_per_class, max_total_size)

+    nmsed_boxes = nmsed_boxes.float()

+    nmsed_scores = nmsed_scores.float()

+    nmsed_classes = nmsed_classes.long()

+    dets = torch.cat((nmsed_boxes.reshape((max_total_size, 4)), nmsed_scores.reshape((max_total_size, 1))), -1)

+    dets = dets.reshape((max_total_size, 5))

+    labels = nmsed_classes.reshape((max_total_size, ))

+    return dets, labels

+

 def multiclass_nms(multi_bboxes,

                    multi_scores,

                    score_thr,

@@ -40,7 +92,10 @@

             multi_scores.size(0), num_classes, 4)



     scores = multi_scores[:, :-1]

+    # for npu

+    dets, labels = batch_nms_op(bboxes, scores, score_thr, nms_cfg.get("iou_threshold"), max_num, max_num)



+    return dets, labels

     labels = torch.arange(num_classes, dtype=torch.long)

     labels = labels.view(1, -1).expand_as(scores)



--- Swin-Transformer-Object-Detection/mmdet/core/post_processing/merge_augs.py	2022-08-03 17:10:43.950705203 -0900

+++ swint/mmdet/core/post_processing/merge_augs.py	2022-08-03 17:03:47.062690940 -0900

@@ -131,8 +131,9 @@

     recovered_masks = []

     for mask, img_info in zip(aug_masks, img_metas):

         flip = img_info[0]['flip']

-        flip_direction = img_info[0]['flip_direction']

+

         if flip:

+            flip_direction = img_info[0]['flip_direction']

             if flip_direction == 'horizontal':

                 mask = mask[:, :, :, ::-1]

             elif flip_direction == 'vertical':

@@ -142,8 +143,11 @@

                     f"Invalid flipping direction '{flip_direction}'")

         recovered_masks.append(mask)



+

+

     if weights is None:

-        merged_masks = np.mean(recovered_masks, axis=0)

+        # merged_masks = np.mean(recovered_masks, axis=0)

+        merged_masks = sum(recovered_masks) / len(recovered_masks)

     else:

         merged_masks = np.average(

             np.array(recovered_masks), axis=0, weights=np.array(weights))













--- Swin-Transformer-Object-Detection/mmdet/models/dense_heads/rpn_head.py

+++ swint/mmdet/models/dense_heads/rpn_head.py

@@ -11,7 +11,7 @@ from mmcv.ops import batched_nms

 from ..builder import HEADS

 from .anchor_head import AnchorHead

 from .rpn_test_mixin import RPNTestMixin

-

+from mmdet.core.post_processing.bbox_nms import  (BatchNMSOp,batch_nms_op)



 @HEADS.register_module()

 class RPNHead(RPNTestMixin, AnchorHead):

@@ -153,6 +153,9 @@ class RPNHead(RPNTestMixin, AnchorHead):

                     nms_pre = torch.where(scores_shape[1] < nms_pre_tensor,

                                           scores_shape[1], nms_pre_tensor)

                     _, topk_inds = scores.topk(nms_pre)

+                    # for npu

+                    topk_inds = topk_inds.long()

+

                     batch_inds = torch.arange(batch_size).view(

                         -1, 1).expand_as(topk_inds)

                     scores = scores[batch_inds, topk_inds]

@@ -230,7 +233,9 @@ class RPNHead(RPNTestMixin, AnchorHead):

                     mlvl_scores = mlvl_scores[valid_ind]

                     mlvl_ids = mlvl_ids[valid_ind]



-            dets, keep = batched_nms(mlvl_proposals, mlvl_scores, mlvl_ids,

-                                     cfg.nms)

+            # dets, keep = batched_nms(mlvl_proposals, mlvl_scores, mlvl_ids,

+            #                          cfg.nms)

+            # for npu

+            dets, _ = batch_nms_op(mlvl_proposals, mlvl_scores, 0.0, cfg.nms.get("iou_threshold"),cfg.max_per_img,cfg.max_per_img)

             result_list.append(dets[:cfg.max_per_img])

         return result_list











--- Swin-Transformer-Object-Detection/mmdet/models/roi_heads/cascade_roi_head.py	2022-08-03 17:10:43.966705203 -0900

+++ swint/mmdet/models/roi_heads/cascade_roi_head.py	2022-08-03 18:00:48.106807982 -0900

@@ -8,7 +8,6 @@

 from .base_roi_head import BaseRoIHead

 from .test_mixins import BBoxTestMixin, MaskTestMixin



-

 @HEADS.register_module()

 class CascadeRoIHead(BaseRoIHead, BBoxTestMixin, MaskTestMixin):

     """Cascade roi head including one bbox head and one mask head.

@@ -288,6 +287,7 @@

         return losses



     def simple_test(self, x, proposal_list, img_metas, rescale=False):

+

         """Test without augmentation."""

         assert self.with_bbox, 'Bbox head must be implemented.'

         num_imgs = len(proposal_list)

@@ -349,8 +349,8 @@

             det_bboxes.append(det_bbox)

             det_labels.append(det_label)



-        if torch.onnx.is_in_onnx_export():

-            return det_bboxes, det_labels

+        # if torch.onnx.is_in_onnx_export():

+        #     return det_bboxes, det_labels

         bbox_results = [

             bbox2result(det_bboxes[i], det_labels[i],

                         self.bbox_head[-1].num_classes)

@@ -383,9 +383,10 @@

                     mask_pred = mask_results['mask_pred']

                     # split batch mask prediction back to each image

                     mask_pred = mask_pred.split(num_mask_rois_per_img, 0)

-                    aug_masks.append(

-                        [m.sigmoid().cpu().numpy() for m in mask_pred])

+                    # aug_masks.append(

+                    #     [m.sigmoid().cpu().numpy() for m in mask_pred])



+                    aug_masks.append([m for m in mask_pred])

                 # apply mask post-processing to each image individually

                 segm_results = []

                 for i in range(num_imgs):

@@ -405,6 +406,10 @@

                         segm_results.append(segm_result)

             ms_segm_result['ensemble'] = segm_results



+        if torch.onnx.is_in_onnx_export():

+            return det_bboxes, det_labels, segm_results

+

+

         if self.with_mask:

             results = list(

                 zip(ms_bbox_result['ensemble'], ms_segm_result['ensemble']))

@@ -413,6 +418,10 @@



         return results



+

+

+

+

     def aug_test(self, features, proposal_list, img_metas, rescale=False):

         """Test with augmentations.





--- Swin-Transformer-Object-Detection/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py	2022-08-03 17:10:43.966705203 -0900

+++ swint/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py	2022-08-03 17:04:31.182692449 -0900

@@ -245,6 +245,11 @@

         bboxes = bboxes / scale_factor



         if torch.onnx.is_in_onnx_export():

+            # for npu

+            N = len(mask_pred)

+            mask_pred = mask_pred[range(N), labels]

+            return mask_pred

+

             # TODO: Remove after F.grid_sample is supported.

             from torchvision.models.detection.roi_heads \

                 import paste_masks_in_image



--- Swin-Transformer-Object-Detection/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py	2022-08-03 17:10:43.966705203 -0900

+++ swint/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py	2022-08-03 17:04:38.434692698 -0900

@@ -3,6 +3,31 @@



 from mmdet.models.builder import ROI_EXTRACTORS

 from .base_roi_extractor import BaseRoIExtractor

+import torch.onnx.symbolic_helper as sym_help

+

+class RoiExtractor(torch.autograd.Function):

+    @staticmethod

+    def forward(self, f0, f1, f2, f3, rois, aligned=1, finest_scale=56, pooled_height=7, pooled_width=7,

+                         pool_mode='avg', roi_scale_factor=0, sample_num=0, spatial_scale=[0.25, 0.125, 0.0625, 0.03125]):

+        """

+        feats (torch.Tensor): feats in shape (batch, 256, H, W).

+        rois (torch.Tensor): rois in shape (k, 5).

+        return:

+            roi_feats (torch.Tensor): (k, 256, pooled_width, pooled_width)

+        """

+

+        # phony implementation for shape inference

+        k = rois.size()[0]

+        roi_feats = torch.ones(k, 256, pooled_height, pooled_width)

+        return roi_feats

+

+    @staticmethod

+    def symbolic(g, f0, f1, f2, f3, rois, aligned=1, finest_scale=56, pooled_height=7, pooled_width=7):

+        # TODO: support tensor list type for feats

+        #f_tensors = sym_help._unpack_list(feats)

+        roi_feats = g.op('RoiExtractor', f0, f1, f2, f3, rois, aligned_i=1, finest_scale_i=56, pooled_height_i=pooled_height, pooled_width_i=pooled_width,

+                         pool_mode_s='avg', roi_scale_factor_i=0, sample_num_i=0, spatial_scale_f=[0.25, 0.125, 0.0625, 0.03125], outputs=1)

+        return roi_feats





 @ROI_EXTRACTORS.register_module()

@@ -52,6 +77,11 @@



     @force_fp32(apply_to=('feats', ), out_fp16=True)

     def forward(self, feats, rois, roi_scale_factor=None):

+        out_size = self.roi_layers[0].output_size

+        roi_feats = RoiExtractor.apply(feats[0], feats[1], feats[2], feats[3], rois, 1, 56, out_size[0],

+                                            out_size[1])

+        return roi_feats

+

         """Forward function."""

         out_size = self.roi_layers[0].output_size

         num_levels = len(feats)