import torch
if torch.__version__ >= '1.8':
import torch_npu
def box_dtype_check(box):
if box not in [torch.float, torch.half]:
return box.float()
def npu_ptiou(boxes1, boxes2):
"""
Given two lists of boxes of size N and M,
compute the IoU (intersection over union)
between __all__ N x M pairs of boxes.
The box order must be (xmin, ymin, xmax, ymax).
Compute Function: insect_area / (union_area + 0.001)
Args:
boxes1(N,4),boxes2(M,4): two `Boxes`. Contains N & M boxes, respectively. Support dtype: float, half.
Returns:
Tensor: IoU, sized [N,M].
"""
boxes1 = box_dtype_check(boxes1)
boxes2 = box_dtype_check(boxes2)
out = torch_npu.npu_ptiou(boxes2, boxes1)
return out
def npu_iou(boxes1, boxes2):
"""
Given two lists of boxes of size N and M,
compute the IoU (intersection over union)
between __all__ N x M pairs of boxes.
The box order must be (xmin, ymin, xmax, ymax).
Compute Function: (insect_area + 0.001) / (union_area + 0.001)
Args:
boxes1(N,4),boxes2(M,4): two `Boxes`. Contains N & M boxes, respectively. Support dtype: float, half.
Returns:
Tensor: IoU, sized [N,M].
"""
boxes1 = box_dtype_check(boxes1)
boxes2 = box_dtype_check(boxes2)
out = torch_npu.npu_iou(boxes2, boxes1)
return out
def point_form(boxes):
""" Convert prior_boxes to (xmin, ymin, xmax, ymax)
representation for comparison to point form ground truth data.
Args:
boxes: (tensor) center-size default boxes from priorbox layers.
Return:
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
"""
return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,
boxes[:, :2] + boxes[:, 2:]/2), 1)
def center_size(boxes):
""" Convert prior_boxes to (cx, cy, w, h)
representation for comparison to center-size form ground truth data.
Args:
boxes: (tensor) point_form boxes
Return:
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
"""
return torch.cat([(boxes[:, 2:] + boxes[:, :2])/2,
boxes[:, 2:] - boxes[:, :2]], 1)
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [A,4].
box_b: (tensor) bounding boxes, Shape: [B,4].
Return:
(tensor) intersection area, Shape: [A,B].
"""
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].contiguous().unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].contiguous().unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].contiguous().unsqueeze(1).expand(A, B, 2),
box_b[:, :2].contiguous().unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, 0] * inter[:, :, 1]
def jaccard(box_a, box_b):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2]-box_a[:, 0]) *
(box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)
area_b = ((box_b[:, 2]-box_b[:, 0]) *
(box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)
union = area_a + area_b - inter
return inter / union
def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
"""Match each prior box with the ground truth box of the highest jaccard
overlap, encode the bounding boxes, then return the matched indices
corresponding to both confidence and location preds.
Args:
threshold: (float) The overlap threshold used when mathing boxes.
truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
variances: (tensor) Variances corresponding to each prior coord,
Shape: [num_priors, 4].
labels: (tensor) All the class labels for the image, Shape: [num_obj].
loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
idx: (int) current batch index
Return:
The matched indices corresponding to 1)location and 2)confidence preds.
"""
overlaps = jaccard(
truths,
point_form(priors)
)
best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
best_truth_idx.squeeze_(0)
best_truth_overlap.squeeze_(0)
best_prior_idx.squeeze_(1)
best_prior_overlap.squeeze_(1)
best_truth_overlap.index_fill_(0, best_prior_idx, 2)
for j in range(best_prior_idx.size(0)):
best_truth_idx[best_prior_idx[j]] = j
matches = truths[best_truth_idx]
conf = labels[best_truth_idx] + 1
conf[best_truth_overlap < threshold] = 0
loc = encode(matches, priors, variances)
loc_t[idx] = loc
conf_t[idx] = conf
def refine_match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx, arm_loc=None, num_gt=None):
"""Match each arm bbox with the ground truth box of the highest jaccard
overlap, encode the bounding boxes, then return the matched indices
corresponding to both confidence and location preds.
Args:
threshold: (float) The overlap threshold used when mathing boxes.
truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
variances: (tensor) Variances corresponding to each prior coord,
Shape: [num_priors, 4].
labels: (tensor) All the class labels for the image, Shape: [num_obj].
loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
idx: (int) current batch index
Return:
The matched indices corresponding to 1)location and 2)confidence preds.
"""
if arm_loc is None:
overlaps = npu_ptiou(truths * 500, point_form(priors) * 500)
else:
decode_arm = decode(arm_loc, priors=priors, variances=variances)
overlaps = npu_ptiou(truths * 500, decode_arm * 500)
best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
best_truth_idx.squeeze_(0)
best_truth_overlap.squeeze_(0)
best_prior_idx.squeeze_(1)
best_prior_overlap.squeeze_(1)
best_prior_idx[num_gt:] = best_prior_idx[0]
best_truth_overlap[best_prior_idx] = 2
for j in range(num_gt):
best_truth_idx[best_prior_idx[j]] = j
matches = truths[best_truth_idx]
if arm_loc is None:
conf = labels[best_truth_idx]
loc = encode(matches, priors, variances)
else:
conf = labels[best_truth_idx] + 1
loc = encode(matches, center_size(decode_arm), variances)
conf[best_truth_overlap < threshold] = 0
loc_t[idx] = loc
conf_t[idx] = conf
def encode(matched, priors, variances):
"""Encode the variances from the priorbox layers into the ground truth boxes
we have matched (based on jaccard overlap) with the prior boxes.
Args:
matched: (tensor) Coords of ground truth for each prior in point-form
Shape: [num_priors, 4].
priors: (tensor) Prior boxes in center-offset form
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
encoded boxes (tensor), Shape: [num_priors, 4]
"""
g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
g_cxcy /= (variances[0] * priors[:, 2:])
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
g_wh = torch.log(g_wh + 1e-5) / variances[1]
return torch.cat([g_cxcy, g_wh], 1)
def decode(loc, priors, variances):
"""Decode locations from predictions using priors to undo
the encoding we did for offset regression at train time.
Args:
loc (tensor): location predictions for loc layers,
Shape: [num_priors,4]
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors,4].
variances: (list[float]) Variances of priorboxes
Return:
decoded bounding box predictions
"""
if priors.device != loc.device:
priors = priors.npu()
boxes = torch.cat((
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
boxes[:, :2] -= boxes[:, 2:] / 2
boxes[:, 2:] += boxes[:, :2]
return boxes
def log_sum_exp(x):
"""Utility function for computing log_sum_exp while determining
This will be used to determine unaveraged confidence loss across
all examples in a batch.
Args:
x (Variable(tensor)): conf_preds from conf layers
"""
x_max = x.data.max()
return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
def print_info(x, name):
print('*' * 20)
print(x)
print(name + '.shape: ', x.shape)
print(name + '.dtype: ', x.dtype)
print(name + '.device: ', x.device)
print('-' * 20)
def nms(boxes, scores, overlap=0.5, top_k=200):
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
Args:
boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
scores: (tensor) The class predscores for the img, Shape:[num_priors].
overlap: (float) The overlap thresh for suppressing unnecessary boxes.
top_k: (int) The Maximum number of box preds to consider.
Return:
The indices of the kept boxes with respect to num_priors.
"""
keep = scores.new(scores.size(0)).zero_().long()
if boxes.numel() == 0:
return keep
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
area = torch.mul(x2 - x1, y2 - y1)
v, idx = scores.sort(0)
idx = idx[-top_k:]
xx1 = boxes.new()
yy1 = boxes.new()
xx2 = boxes.new()
yy2 = boxes.new()
w = boxes.new()
h = boxes.new()
count = 0
while idx.numel() > 0:
i = idx[-1]
keep[count] = i
count += 1
if idx.size(0) == 1:
break
idx = idx[:-1]
idx = torch.autograd.Variable(idx, requires_grad=False)
idx = idx.data
x1 = torch.autograd.Variable(x1, requires_grad=False)
x1 = x1.data
y1 = torch.autograd.Variable(y1, requires_grad=False)
y1 = y1.data
x2 = torch.autograd.Variable(x2, requires_grad=False)
x2 = x2.data
y2 = torch.autograd.Variable(y2, requires_grad=False)
y2 = y2.data
xx1 = torch.index_select(x1, 0, idx)
yy1 = torch.index_select(y1, 0, idx)
xx2 = torch.index_select(x2, 0, idx)
yy2 = torch.index_select(y2, 0, idx)
xx1 = torch.clamp(xx1, min=x1[i])
yy1 = torch.clamp(yy1, min=y1[i])
xx2 = torch.clamp(xx2, max=x2[i])
yy2 = torch.clamp(yy2, max=y2[i])
w.resize_as_(xx2)
h.resize_as_(yy2)
w = xx2 - xx1
h = yy2 - yy1
w = torch.clamp(w, min=0.0)
h = torch.clamp(h, min=0.0)
inter = w*h
area = torch.autograd.Variable(area, requires_grad=False)
area = area.data
idx = torch.autograd.Variable(idx, requires_grad=False)
idx = idx.data
rem_areas = torch.index_select(area, 0, idx)
union = (rem_areas - inter) + area[i]
IoU = inter/union
idx = idx[IoU.le(overlap)]
return keep, count