@@ -1,248 +1,34 @@
# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright 2024 Huawei Technologies Co., Ltd
import math
from typing import Optional, Tuple, Union
import torch
+import torch_npu
import torch.nn as nn
-from torch.autograd import Function
-from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single
+from mmcv.utils import deprecated_api_warning
+from mx_driving.fused import modulated_deform_conv2d, ModulatedDeformConv2dFunction
-from mmcv.utils import IS_MLU_AVAILABLE, deprecated_api_warning
from ..cnn import CONV_LAYERS
-from ..utils import ext_loader, print_log
-
-ext_module = ext_loader.load_ext(
- '_ext',
- ['modulated_deform_conv_forward', 'modulated_deform_conv_backward'])
-
-
-class ModulatedDeformConv2dFunction(Function):
-
- @staticmethod
- def symbolic(g, input, offset, mask, weight, bias, stride, padding,
- dilation, groups, deform_groups):
- input_tensors = [input, offset, mask, weight]
- if bias is not None:
- input_tensors.append(bias)
- return g.op(
- 'mmcv::MMCVModulatedDeformConv2d',
- *input_tensors,
- stride_i=stride,
- padding_i=padding,
- dilation_i=dilation,
- groups_i=groups,
- deform_groups_i=deform_groups)
-
- @staticmethod
- def _calculate_sort_index(kernel_h, kernel_w, deformable_group):
- split_num = deformable_group * 2 * kernel_h * kernel_w
- sort_index = list(range(split_num))
- sort_index_fp = (sort_index[1::2] + sort_index[::2])
- sort_index_bp_dict = {i: idx for idx, i in enumerate(sort_index_fp)}
- sort_index_bp = [sort_index_bp_dict[i] for i in sort_index]
- sort_index_fp = torch.IntTensor(sort_index_fp)
- sort_index_bp = torch.IntTensor(sort_index_bp)
- sort_index_fp = sort_index_fp.npu()
- sort_index_bp = sort_index_bp.npu()
- return sort_index_fp, sort_index_bp
-
- @staticmethod
- def _npu_forward(ctx, input_tensor, offset, mask, weight, bias):
- _, _, kernel_h, kernel_w = weight.shape
- conv2d_bias = bias if len(bias) > 0 else None
- sort_index_fp, sort_index_bp = \
- ModulatedDeformConv2dFunction._calculate_sort_index(
- kernel_w, kernel_h, ctx.deform_groups)
- select_offset = offset.index_select(1, sort_index_fp)
- offset_all = torch.cat([select_offset, mask], dim=1)
- import torch_npu
- output, offset_out = torch_npu.npu_deformable_conv2d(
- input_tensor,
- weight,
- offset_all,
- conv2d_bias,
- kernel_size=[kernel_w, kernel_h],
- stride=[1, 1, ctx.stride[0], ctx.stride[1]],
- padding=[
- ctx.padding[0], ctx.padding[0], ctx.padding[1], ctx.padding[1]
- ],
- dilation=[1, 1, ctx.dilation[0], ctx.dilation[1]],
- groups=ctx.groups,
- deformable_groups=ctx.deform_groups,
- modulated=True)
- if weight.requires_grad or mask.requires_grad or offset.requires_grad \
- or input_tensor.requires_grad:
- ctx.save_for_backward(input_tensor, weight, offset_out, offset_all,
- sort_index_bp)
- return output
-
- @staticmethod
- def _npu_backward(ctx, grad_output):
- input_tensor, weight, offset_out, offset_all, sort_index_bp = \
- ctx.saved_tensors
- import torch_npu
- grad_input, grad_weight, grad_offset_all, grad_bias = \
- torch_npu.npu_deformable_conv2dbk(
- input_tensor, grad_output, offset_out, weight, offset_all,
- kernel_size=[weight.shape[3], weight.shape[2]],
- stride=[1, 1, ctx.stride[0], ctx.stride[1]],
- padding=[ctx.padding[0], ctx.padding[0], ctx.padding[1],
- ctx.padding[1]],
- dilation=[1, 1, ctx.dilation[0], ctx.dilation[1]],
- groups=ctx.groups, deformable_groups=ctx.deform_groups,
- modulated=True)
- grad_offset = grad_offset_all.index_select(1, sort_index_bp)
- grad_mask = grad_offset_all[:, grad_offset.shape[1]:, :, :]
- if not ctx.with_bias:
- grad_bias = None
- return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
- None, None, None, None, None, None, None, None)
-
- @staticmethod
- def forward(ctx,
- input: torch.Tensor,
- offset: torch.Tensor,
- mask: torch.Tensor,
- weight: nn.Parameter,
- bias: Optional[nn.Parameter] = None,
- stride: int = 1,
- padding: int = 0,
- dilation: int = 1,
- groups: int = 1,
- deform_groups: int = 1) -> torch.Tensor:
- if input is not None and input.dim() != 4:
- raise ValueError(
- f'Expected 4D tensor as input, got {input.dim()}D tensor \
- instead.')
- ctx.stride = _pair(stride)
- ctx.padding = _pair(padding)
- ctx.dilation = _pair(dilation)
- ctx.groups = groups
- ctx.deform_groups = deform_groups
- ctx.with_bias = bias is not None
- ctx.device = input.device.type
- if not ctx.with_bias:
- bias = input.new_empty(0) # fake tensor
- # When pytorch version >= 1.6.0, amp is adopted for fp16 mode;
- # amp won't cast the type of model (float32), but "offset" is cast
- # to float16 by nn.Conv2d automatically, leading to the type
- # mismatch with input (when it is float32) or weight.
- # The flag for whether to use fp16 or amp is the type of "offset",
- # we cast weight and input to temporarily support fp16 and amp
- # whatever the pytorch version is.
- input = input.type_as(offset)
- weight = weight.type_as(input)
- bias = bias.type_as(input) # type: ignore
- mask = mask.type_as(input)
- if ctx.device == 'npu':
- output = ModulatedDeformConv2dFunction._npu_forward(
- ctx, input, offset, mask, weight, bias)
- return output
- ctx.save_for_backward(input, offset, mask, weight, bias)
- output = input.new_empty(
- ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
- ctx._bufs = [input.new_empty(0), input.new_empty(0)]
- ext_module.modulated_deform_conv_forward(
- input,
- weight,
- bias,
- ctx._bufs[0],
- offset,
- mask,
- output,
- ctx._bufs[1],
- kernel_h=weight.size(2),
- kernel_w=weight.size(3),
- stride_h=ctx.stride[0],
- stride_w=ctx.stride[1],
- pad_h=ctx.padding[0],
- pad_w=ctx.padding[1],
- dilation_h=ctx.dilation[0],
- dilation_w=ctx.dilation[1],
- group=ctx.groups,
- deformable_group=ctx.deform_groups,
- with_bias=ctx.with_bias)
- return output
-
- @staticmethod
- @once_differentiable
- def backward(ctx, grad_output: torch.Tensor) -> tuple:
- if ctx.device == 'npu':
- return ModulatedDeformConv2dFunction._npu_backward(
- ctx, grad_output)
- input, offset, mask, weight, bias = ctx.saved_tensors
- grad_input = torch.zeros_like(input)
- grad_offset = torch.zeros_like(offset)
- grad_mask = torch.zeros_like(mask)
- grad_weight = torch.zeros_like(weight)
- grad_bias = torch.zeros_like(bias)
- grad_output = grad_output.contiguous()
- ext_module.modulated_deform_conv_backward(
- input,
- weight,
- bias,
- ctx._bufs[0],
- offset,
- mask,
- ctx._bufs[1],
- grad_input,
- grad_weight,
- grad_bias,
- grad_offset,
- grad_mask,
- grad_output,
- kernel_h=weight.size(2),
- kernel_w=weight.size(3),
- stride_h=ctx.stride[0],
- stride_w=ctx.stride[1],
- pad_h=ctx.padding[0],
- pad_w=ctx.padding[1],
- dilation_h=ctx.dilation[0],
- dilation_w=ctx.dilation[1],
- group=ctx.groups,
- deformable_group=ctx.deform_groups,
- with_bias=ctx.with_bias)
- if not ctx.with_bias:
- grad_bias = None
-
- return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
- None, None, None, None, None)
-
- @staticmethod
- def _output_size(ctx, input, weight):
- channels = weight.size(0)
- output_size = (input.size(0), channels)
- for d in range(input.dim() - 2):
- in_size = input.size(d + 2)
- pad = ctx.padding[d]
- kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
- stride_ = ctx.stride[d]
- output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
- if not all(map(lambda s: s > 0, output_size)):
- raise ValueError(
- 'convolution input is too small (output would be ' +
- 'x'.join(map(str, output_size)) + ')')
- return output_size
-
-
-modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply
+from ..utils import print_log
class ModulatedDeformConv2d(nn.Module):
- @deprecated_api_warning({'deformable_groups': 'deform_groups'},
- cls_name='ModulatedDeformConv2d')
- def __init__(self,
- in_channels: int,
- out_channels: int,
- kernel_size: Union[int, Tuple[int]],
- stride: int = 1,
- padding: int = 0,
- dilation: int = 1,
- groups: int = 1,
- deform_groups: int = 1,
- bias: Union[bool, str] = True):
+ @deprecated_api_warning({"deformable_groups": "deform_groups"}, cls_name="ModulatedDeformConv2d")
+ def __init__(
+ self,
+ in_channels: int,
+ out_channels: int,
+ kernel_size: Union[int, Tuple[int]],
+ stride: int = 1,
+ padding: int = 0,
+ dilation: int = 1,
+ groups: int = 1,
+ deform_groups: int = 1,
+ bias: Union[bool, str] = True,
+ ):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
@@ -256,33 +42,38 @@ class ModulatedDeformConv2d(nn.Module):
self.transposed = False
self.output_padding = _single(0)
- self.weight = nn.Parameter(
- torch.Tensor(out_channels, in_channels // groups,
- *self.kernel_size))
+ self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, *self.kernel_size))
if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
else:
- self.register_parameter('bias', None)
+ self.register_parameter("bias", None)
self.init_weights()
def init_weights(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
- stdv = 1. / math.sqrt(n)
+ stdv = 1.0 / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.zero_()
- def forward(self, x: torch.Tensor, offset: torch.Tensor,
- mask: torch.Tensor) -> torch.Tensor:
- return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
- self.stride, self.padding,
- self.dilation, self.groups,
- self.deform_groups)
+ def forward(self, x: torch.Tensor, offset: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+ return modulated_deform_conv2d(
+ x,
+ offset,
+ mask,
+ self.weight,
+ self.bias,
+ self.stride,
+ self.padding,
+ self.dilation,
+ self.groups,
+ self.deform_groups,
+ )
-@CONV_LAYERS.register_module('DCNv2')
+@CONV_LAYERS.register_module("DCNv2")
class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
"""A ModulatedDeformable Conv Encapsulation that acts as normal Conv
layers.
@@ -311,115 +102,53 @@ class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
- bias=True)
+ bias=True,
+ )
self.init_weights()
def init_weights(self) -> None:
super().init_weights()
- if hasattr(self, 'conv_offset'):
+ if hasattr(self, "conv_offset"):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x: torch.Tensor) -> torch.Tensor: # type: ignore
out = self.conv_offset(x)
- o1, o2, mask = torch.chunk(out, 3, dim=1)
- offset = torch.cat((o1, o2), dim=1)
+ len1 = ((out.shape[1] + 2) // 3) * 2
+ len2 = out.shape[1] - len1
+ offset, mask = torch.split(out, [len1, len2], dim=1)
mask = torch.sigmoid(mask)
- return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
- self.stride, self.padding,
- self.dilation, self.groups,
- self.deform_groups)
-
- def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
- missing_keys, unexpected_keys, error_msgs):
- version = local_metadata.get('version', None)
+ return modulated_deform_conv2d(
+ x,
+ offset,
+ mask,
+ self.weight,
+ self.bias,
+ self.stride,
+ self.padding,
+ self.dilation,
+ self.groups,
+ self.deform_groups,
+ )
+
+ # pylint: disable=huawei-too-many-arguments
+ def _load_from_state_dict(
+ self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+ ):
+ version = local_metadata.get("version", None)
if version is None or version < 2:
# the key is different in early versions
# In version < 2, ModulatedDeformConvPack
# loads previous benchmark models.
- if (prefix + 'conv_offset.weight' not in state_dict
- and prefix[:-1] + '_offset.weight' in state_dict):
- state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
- prefix[:-1] + '_offset.weight')
- if (prefix + 'conv_offset.bias' not in state_dict
- and prefix[:-1] + '_offset.bias' in state_dict):
- state_dict[prefix +
- 'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
- '_offset.bias')
+ if prefix + "conv_offset.weight" not in state_dict and prefix[:-1] + "_offset.weight" in state_dict:
+ state_dict[prefix + "conv_offset.weight"] = state_dict.pop(prefix[:-1] + "_offset.weight")
+ if prefix + "conv_offset.bias" not in state_dict and prefix[:-1] + "_offset.bias" in state_dict:
+ state_dict[prefix + "conv_offset.bias"] = state_dict.pop(prefix[:-1] + "_offset.bias")
if version is not None and version > 1:
- print_log(
- f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to '
- 'version 2.',
- logger='root')
-
- super()._load_from_state_dict(state_dict, prefix, local_metadata,
- strict, missing_keys, unexpected_keys,
- error_msgs)
-
-
-if IS_MLU_AVAILABLE:
- import torchvision
- from torchvision.ops import deform_conv2d as tv_deform_conv2d
-
- from mmcv.utils import digit_version
-
- @CONV_LAYERS.register_module('DCNv2', force=True)
- class ModulatedDeformConv2dPack_MLU(ModulatedDeformConv2d):
- """This class is the DCNv2 implementation of the MLU device. The MLU
- backend support of the operator has been implemented in torchvision.
- The mmcv registration mechanism is used for multiplexing here. The
- torchvision implementation of DCNv2 is called.
-
- Args:
- in_channels (int): Same as nn.Conv2d.
- out_channels (int): Same as nn.Conv2d.
- kernel_size (int or tuple[int]): Same as nn.Conv2d.
- stride (int): Same as nn.Conv2d, while tuple is not supported.
- padding (int): Same as nn.Conv2d, while tuple is not supported.
- dilation (int): Same as nn.Conv2d, while tuple is not supported.
- groups (int): Same as nn.Conv2d.
- bias (bool or str): If specified as `auto`, it will be decided by
- the norm_cfg. Bias will be set as True if norm_cfg is None,
- otherwise False.
- """
-
- def __init__(self, *args, **kwargs):
- assert digit_version(torchvision.__version__) >= digit_version(
- '0.10.0a0'), 'the version of torchvision should be >= 0.10.0'
- super().__init__(*args, **kwargs)
- self.conv_offset = nn.Conv2d(
- self.in_channels,
- self.deform_groups * 3 * self.kernel_size[0] *
- self.kernel_size[1],
- kernel_size=self.kernel_size,
- stride=self.stride,
- padding=self.padding,
- dilation=self.dilation,
- bias=True)
- self.init_weights()
-
- def init_weights(self):
- super().init_weights()
- if hasattr(self, 'conv_offset'):
- self.conv_offset.weight.data.zero_()
- self.conv_offset.bias.data.zero_()
+ print_log(f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' "version 2.", logger="root")
- def forward(self, x):
- out = self.conv_offset(x)
- o1, o2, mask = torch.chunk(out, 3, dim=1)
- offset = torch.cat((o1, o2), dim=1)
- mask = torch.sigmoid(mask)
- x = x.type_as(offset)
- weight = self.weight.type_as(x)
- mask = mask.type_as(x)
- return tv_deform_conv2d(
- x,
- offset,
- weight,
- bias=self.bias,
- stride=self.stride,
- padding=self.padding,
- dilation=self.dilation,
- mask=mask)
+ super()._load_from_state_dict(
+ state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+ )
@@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
+# Copyright 2024 Huawei Technologies Co., Ltd
from typing import Any, List, Tuple
import torch
@@ -156,8 +157,7 @@ class MMDistributedDataParallel(DistributedDataParallel):
Returns:
Any: Forward result of :attr:`module`.
"""
- module_to_run = self._replicated_tensor_module if \
- self._use_replicated_tensor_module else self.module
+ module_to_run = self.module
if self.device_ids:
inputs, kwargs = self.to_kwargs( # type: ignore
@@ -1,4 +1,19 @@
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
# Copyright (c) OpenMMLab. All rights reserved.
+import os
import os.path as osp
import platform
import shutil
@@ -15,6 +30,21 @@ from .builder import RUNNERS
from .checkpoint import save_checkpoint
from .utils import get_host_info
+try:
+ from torch_npu.utils.profiler import Profile
+except ImportError:
+ print("Profile not in torch_npu.utils.profiler now.. Auto Profile disabled.", flush=True)
+
+ class Profile:
+ def __init__(self, *args, **kwargs):
+ pass
+
+ def start(self):
+ pass
+
+ def end(self):
+ pass
+
@RUNNERS.register_module()
class EpochBasedRunner(BaseRunner):
@@ -46,7 +76,10 @@ class EpochBasedRunner(BaseRunner):
self._max_iters = self._max_epochs * len(self.data_loader)
self.call_hook('before_train_epoch')
time.sleep(2) # Prevent possible deadlock during epoch transition
+ profile = Profile(start_step=int(os.getenv('PROFILE_START_STEP', 10)),
+ profile_type=os.getenv('PROFILE_TYPE'))
for i, data_batch in enumerate(self.data_loader):
+ profile.start()
self.data_batch = data_batch
self._inner_iter = i
self.call_hook('before_train_iter')
@@ -54,6 +87,7 @@ class EpochBasedRunner(BaseRunner):
self.call_hook('after_train_iter')
del self.data_batch
self._iter += 1
+ profile.end()
self.call_hook('after_train_epoch')
self._epoch += 1
@@ -194,4 +228,4 @@ class Runner(EpochBasedRunner):
warnings.warn(
'Runner was deprecated, please use EpochBasedRunner instead',
DeprecationWarning)
- super().__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
\ No newline at end of file
@@ -1,3 +1,17 @@
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import logging
@@ -52,11 +66,11 @@ class OptimizerHook(Hook):
self.grad_clip = grad_clip
self.detect_anomalous_params = detect_anomalous_params
- def clip_grads(self, params):
+ def clip_grads(self, params, runner):
params = list(
filter(lambda p: p.requires_grad and p.grad is not None, params))
if len(params) > 0:
- return clip_grad.clip_grad_norm_(params, **self.grad_clip)
+ return runner.optimizer.clip_grad_norm_fused_(**self.grad_clip)
def after_train_iter(self, runner):
runner.optimizer.zero_grad()
@@ -65,7 +79,7 @@ class OptimizerHook(Hook):
runner.outputs['loss'].backward()
if self.grad_clip is not None:
- grad_norm = self.clip_grads(runner.model.parameters())
+ grad_norm = self.clip_grads(runner.model.parameters(), runner)
if grad_norm is not None:
# Add grad norm to the logger
runner.log_buffer.update({'grad_norm': float(grad_norm)},
@@ -182,7 +196,7 @@ class GradientCumulativeOptimizerHook(OptimizerHook):
or self.is_last_iter(runner)):
if self.grad_clip is not None:
- grad_norm = self.clip_grads(runner.model.parameters())
+ grad_norm = self.clip_grads(runner.model.parameters(), runner)
if grad_norm is not None:
# Add grad norm to the logger
runner.log_buffer.update({'grad_norm': float(grad_norm)},
@@ -291,7 +305,7 @@ if (TORCH_VERSION != 'parrots'
self.loss_scaler.unscale_(runner.optimizer)
# grad clip
if self.grad_clip is not None:
- grad_norm = self.clip_grads(runner.model.parameters())
+ grad_norm = self.clip_grads(runner.model.parameters(), runner)
if grad_norm is not None:
# Add grad norm to the logger
runner.log_buffer.update({'grad_norm': float(grad_norm)},
@@ -331,7 +345,7 @@ if (TORCH_VERSION != 'parrots'
self.loss_scaler.unscale_(runner.optimizer)
if self.grad_clip is not None:
- grad_norm = self.clip_grads(runner.model.parameters())
+ grad_norm = self.clip_grads(runner.model.parameters(), runner)
if grad_norm is not None:
# Add grad norm to the logger
runner.log_buffer.update(
@@ -477,7 +491,7 @@ else:
if param.grad is not None:
param.grad.div_(self.loss_scaler.loss_scale)
if self.grad_clip is not None:
- grad_norm = self.clip_grads(fp32_weights)
+ grad_norm = self.clip_grads(fp32_weights, runner)
if grad_norm is not None:
# Add grad norm to the logger
runner.log_buffer.update(
@@ -534,7 +548,7 @@ else:
if param.grad is not None:
param.grad.div_(self.loss_scaler.loss_scale)
if self.grad_clip is not None:
- grad_norm = self.clip_grads(fp32_weights)
+ grad_norm = self.clip_grads(fp32_weights, runner)
if grad_norm is not None:
# Add grad norm to the logger
runner.log_buffer.update(
@@ -557,4 +571,4 @@ else:
# clear grads
runner.model.zero_grad()
- runner.optimizer.zero_grad()
+ runner.optimizer.zero_grad()
\ No newline at end of file