from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import cv2
from core.config import cfg
from utils.bbox import corner2center
from rknnlite.api import RKNNLite
class NnoTracker_RKNNLite(object):
def __init__(self,Tback_weight, Xback_weight, Head_weight):
self.score_size = (cfg.TRACK.INSTANCE_SIZE - cfg.TRACK.EXEMPLAR_SIZE) // \
cfg.POINT.STRIDE + 1 + cfg.TRACK.BASE_SIZE
hanning = np.hanning(self.score_size)
window = np.outer(hanning, hanning)
self.cls_out_channels = 2
self.window = window.flatten()
self.points = self.generate_points(cfg.POINT.STRIDE, self.score_size)
self.rknn_Tback = RKNNLite()
print('--> Load RKNN model')
ret = self.rknn_Tback.load_rknn(Tback_weight)
if ret != 0:
print('Load RKNN model failed')
exit(ret)
print('done')
print('--> Init runtime environment')
ret = self.rknn_Tback.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
if ret != 0:
print('Init runtime environment failed')
exit(ret)
print('done')
self.rknn_Xback = RKNNLite()
print('--> rknn_Xback: Loading model')
ret = self.rknn_Xback.load_rknn(Xback_weight)
if ret != 0:
print('rknn_Xback: Load model failed!')
exit(ret)
print('rknn_Xback:done')
print('--> Init runtime environment')
ret = self.rknn_Xback.init_runtime(core_mask=RKNNLite.NPU_CORE_1)
if ret != 0:
print('Init runtime environment failed!')
exit(ret)
print('done')
self.rknn_Head = RKNNLite()
print('--> rknn_Head: Loading model')
ret = self.rknn_Head.load_rknn(Head_weight)
if ret != 0:
print('rknn_Head: Load model failed!')
exit(ret)
print('rknn_Head:done')
print('--> Init runtime environment')
ret = self.rknn_Head.init_runtime(core_mask=RKNNLite.NPU_CORE_2)
if ret != 0:
print('Init runtime environment failed!')
exit(ret)
print('done')
def generate_points(self, stride, size):
ori = - (size // 2) * stride
x, y = np.meshgrid([ori + stride * dx for dx in np.arange(0, size)],
[ori + stride * dy for dy in np.arange(0, size)])
points = np.zeros((size * size, 2), dtype=np.float32)
points[:, 0], points[:, 1] = x.astype(np.float32).flatten(), y.astype(np.float32).flatten()
return points
def _convert_bbox(self, delta, point):
delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1)
delta = delta.detach().cpu().numpy()
delta[0, :] = point[:, 0] - delta[0, :]
delta[1, :] = point[:, 1] - delta[1, :]
delta[2, :] = point[:, 0] + delta[2, :]
delta[3, :] = point[:, 1] + delta[3, :]
delta[0, :], delta[1, :], delta[2, :], delta[3, :] = corner2center(delta)
return delta
def _convert_score(self, score):
if self.cls_out_channels == 1:
score = score.permute(1, 2, 3, 0).contiguous().view(-1)
score = score.sigmoid().detach().cpu().numpy()
else:
score = score.permute(1, 2, 3, 0).contiguous().view(self.cls_out_channels, -1).permute(1, 0)
score = score.softmax(1).detach()[:, 1].cpu().numpy()
return score
def _convert_bbox_numpy(self, delta, point):
delta = delta.transpose((1,2,3,0)).reshape(4, -1)
delta[0, :] = point[:, 0] - delta[0, :]
delta[1, :] = point[:, 1] - delta[1, :]
delta[2, :] = point[:, 0] + delta[2, :]
delta[3, :] = point[:, 1] + delta[3, :]
delta[0, :], delta[1, :], delta[2, :], delta[3, :] = corner2center(delta)
return delta
def _convert_score_numpy(self, score):
def sofmax(logits):
e_x = np.exp(logits)
probs = e_x / np.sum(e_x, axis=-1, keepdims=True)
return probs
score = score.transpose((1,2,3,0)).reshape(self.cls_out_channels, -1).transpose((1,0))
score = sofmax(score)[:,1]
return score
def _bbox_clip(self, cx, cy, width, height, boundary):
cx = max(0, min(cx, boundary[1]))
cy = max(0, min(cy, boundary[0]))
width = max(10, min(width, boundary[1]))
height = max(10, min(height, boundary[0]))
return cx, cy, width, height
def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans):
"""
args:
im: bgr based image
pos: center position
model_sz: exemplar size
s_z: original size
avg_chans: channel average
"""
if isinstance(pos, float):
pos = [pos, pos]
sz = original_sz
im_sz = im.shape
c = (original_sz + 1) / 2
context_xmin = np.floor(pos[0] - c + 0.5)
context_xmax = context_xmin + sz - 1
context_ymin = np.floor(pos[1] - c + 0.5)
context_ymax = context_ymin + sz - 1
left_pad = int(max(0., -context_xmin))
top_pad = int(max(0., -context_ymin))
right_pad = int(max(0., context_xmax - im_sz[1] + 1))
bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
context_xmin = context_xmin + left_pad
context_xmax = context_xmax + left_pad
context_ymin = context_ymin + top_pad
context_ymax = context_ymax + top_pad
r, c, k = im.shape
if any([top_pad, bottom_pad, left_pad, right_pad]):
size = (r + top_pad + bottom_pad, c + left_pad + right_pad, k)
te_im = np.zeros(size, np.uint8)
te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
if top_pad:
te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans
if bottom_pad:
te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans
if left_pad:
te_im[:, 0:left_pad, :] = avg_chans
if right_pad:
te_im[:, c + left_pad:, :] = avg_chans
im_patch = te_im[int(context_ymin):int(context_ymax + 1),
int(context_xmin):int(context_xmax + 1), :]
else:
im_patch = im[int(context_ymin):int(context_ymax + 1),
int(context_xmin):int(context_xmax + 1), :]
if not np.array_equal(model_sz, original_sz):
im_patch = cv2.resize(im_patch, (model_sz, model_sz))
im_patch = im_patch.transpose(2, 0, 1)
im_patch = im_patch[np.newaxis, :, :, :]
im_patch = im_patch.astype(np.float32)
return im_patch
def init(self, img, bbox):
"""
args:
img(np.ndarray): BGR image
bbox: (x, y, w, h) bbox
"""
self.center_pos = np.array([bbox[0] + (bbox[2] - 1) / 2,
bbox[1] + (bbox[3] - 1) / 2])
self.size = np.array([bbox[2], bbox[3]])
w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
s_z = round(np.sqrt(w_z * h_z))
self.channel_average = np.mean(img, axis=(0, 1))
z_crop = self.get_subwindow(img, self.center_pos,
cfg.TRACK.EXEMPLAR_SIZE,
s_z, self.channel_average)
back_T_in = z_crop.transpose((0,2,3,1))
self.Toutput = self.rknn_Tback.inference(inputs=[back_T_in])
self.rknn_Tback.release()
def track(self, img):
"""
args:
img(np.ndarray): BGR image
return:
bbox(list):[x, y, width, height]
"""
w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
s_z = np.sqrt(w_z * h_z)
scale_z = cfg.TRACK.EXEMPLAR_SIZE / s_z
s_x = s_z * (cfg.TRACK.INSTANCE_SIZE / cfg.TRACK.EXEMPLAR_SIZE)
x_crop = self.get_subwindow(img, self.center_pos,
cfg.TRACK.INSTANCE_SIZE,
round(s_x), self.channel_average)
back_X_in = x_crop.transpose((0,2,3,1))
self.Xoutput = self.rknn_Xback.inference(inputs=[back_X_in])
head_T_in = self.Toutput[0].transpose((0,2,3,1))
head_X_in = self.Xoutput[0].transpose((0,2,3,1))
outputs = self.rknn_Head.inference(inputs=[head_T_in, head_X_in])
score = self._convert_score_numpy(outputs[0])
pred_bbox = self._convert_bbox_numpy(outputs[1], self.points)
def change(r):
return np.maximum(r, 1. / r)
def sz(w, h):
pad = (w + h) * 0.5
return np.sqrt((w + pad) * (h + pad))
s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) /
(sz(self.size[0] * scale_z, self.size[1] * scale_z)))
r_c = change((self.size[0] / self.size[1]) /
(pred_bbox[2, :] / pred_bbox[3, :]))
penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K)
pscore = penalty * score
pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \
self.window * cfg.TRACK.WINDOW_INFLUENCE
best_idx = np.argmax(pscore)
bbox = pred_bbox[:, best_idx] / scale_z
lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR
cx = bbox[0] + self.center_pos[0]
cy = bbox[1] + self.center_pos[1]
width = self.size[0] * (1 - lr) + bbox[2] * lr
height = self.size[1] * (1 - lr) + bbox[3] * lr
cx, cy, width, height = self._bbox_clip(cx, cy, width,
height, img.shape[:2])
self.center_pos = np.array([cx, cy])
self.size = np.array([width, height])
bbox = [cx - width / 2,
cy - height / 2,
width,
height]
best_score = score[best_idx]
return {
'bbox': bbox,
'best_score': best_score
}