import torch
if torch.__version__ >= '1.8':
import torch_npu
import torch.nn as nn
class NPULabelSmoothingCrossEntropy(nn.Module):
"""CrossEntropy with LabelSmoothing using npu api.
Paper: [Rethinking the Inception Architecture for Computer Vision]
https://arxiv.org/pdf/1512.00567.pdf
Args:
smooth_factor (float): default 0. If label_smoothing using, using 0.1([0, 1]) instead.
num_classes (float): classes numbers using for onehot.
Returns:
float: tensors of shape (k, 5) and (k, 1). Labels are 0-based.
"""
def __init__(self, num_classes=1000, smooth_factor=0.):
super(NPULabelSmoothingCrossEntropy, self).__init__()
self.on_value = 1.0 - smooth_factor
self.off_value = 1.0 * smooth_factor / (num_classes - 1)
def forward(self, pred, target):
one_hot_label = torch_npu.npu_one_hot(target.int(), -1, pred.size(1), self.on_value, self.off_value)
loss = torch_npu.npu_softmax_cross_entropy_with_logits(pred, one_hot_label)
loss = torch.mean(loss, [0], keepdim=False, dtype=torch.float32)
return loss
class GPULabelSmoothingCrossEntropy(nn.Module):
"""
NLL loss with label smoothing.
"""
def __init__(self, num_classes=1000, smooth_factor=0.0):
"""
Constructor for the LabelSmoothing module.
:param smoothing: label smoothing factor
"""
super(GPULabelSmoothingCrossEntropy, self).__init__()
self.confidence = 1.0 - smooth_factor
self.smoothing = smooth_factor
def forward(self, x, target):
target = target.to(torch.int64)
logprobs = torch.nn.functional.log_softmax(x, dim=-1)
nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1).to(torch.int64))
nll_loss = nll_loss.squeeze(1)
smooth_loss = -logprobs.mean(dim=-1)
loss = self.confidence * nll_loss + self.smoothing * smooth_loss
return loss.mean()
def get_label_smoothing_cross_entropy(device, num_classes=1000, smooth_factor=0.0):
if device.split(':')[0] == 'npu':
return NPULabelSmoothingCrossEntropy(num_classes, smooth_factor)
else:
return GPULabelSmoothingCrossEntropy(num_classes, smooth_factor)
if __name__ == '__main__':
x = torch.randn(2, 10)
x.requires_grad = True
y = torch.randint(0, 10, size=(2,))
torch.npu.set_device(0)
x = x.npu()
y = y.npu()
m = NPULabelSmoothingCrossEntropy(10)
l = m(x, y)
l.backward()
print('test ce ok, loss is ', l)
m = NPULabelSmoothingCrossEntropy(10, 0.1)
l = m(x, y)
l.backward()
print('test lsce ok, loss is ', l)