1c3ad5c5创建于 2020年3月4日历史提交
#include "sipeed_yolo2.h"
#include "sipeed_kpu.h"
#include <stdlib.h>
#include <math.h>
#include "printf.h"

// #include "lcd.h"

/* start of region_layer.c*/

typedef struct
{
    float x;
    float y;
    float w;
    float h;
} __attribute__((aligned(8))) box_t;

typedef struct
{
    int index;
    int class;
    float **probs;
} __attribute__((aligned(8))) sortable_box_t;


int region_layer_init(region_layer_t *rl, void* ctx)
{
    int flag = 0;
	uint16_t wi,hi,chi;
	uint16_t wo,ho,cho;
	size_t size;
	int kmodel_type=sipeed_kpu_model_get_type(ctx);
	
	
	if(sipeed_kpu_model_get_input_shape(ctx, &wi, &hi, &chi) != SIPEED_KPU_ERR_NONE)
	{
		// mp_printf(&mp_plat_print, "[MAIXPY]rl: first layer not conv layer!\r\n");
		return -1;
	}
	
	if(sipeed_kpu_model_get_output_shape(ctx, &wo, &ho, &cho) != SIPEED_KPU_ERR_NONE)
	{
		// mp_printf(&mp_plat_print, "[MAIXPY]rl: can't fetch last layer!\r\n");
		return -1;
	}

    rl->coords = 4;
    rl->image_width = wi;
    rl->image_height = hi;

    rl->classes = cho / 5 - 5;
    rl->net_width = wi;
    rl->net_height = hi;
    rl->layer_width = wo;
    rl->layer_height = ho;
    rl->boxes_number = (rl->layer_width * rl->layer_height * rl->anchor_number);
    rl->output_number = (rl->boxes_number * (rl->classes + rl->coords + 1));
    
	sipeed_kpu_get_output(ctx, 0, &(rl->output), &size);	
	
	//module output -> rl output
	//mp_printf(&mp_plat_print, "size=%ld\r\n",size);
    //rl->scale = output_scale;
    //rl->bias = output_bias;

    // Initialize for out-of-memory situations
    rl->probs_buf = NULL;
    rl->probs = NULL;

    /*rl->output = malloc(rl->output_number * sizeof(float));
    if (rl->output == NULL)
    {
        flag = -1;
        goto malloc_error;
    }*/
    rl->boxes = malloc(rl->boxes_number * sizeof(box_t));
    if (rl->boxes == NULL)
    {
        flag = -2;
        goto malloc_error;
    }
    rl->probs_buf = malloc(rl->boxes_number * (rl->classes + 1) * sizeof(float));
    if (rl->probs_buf == NULL)
    {
        flag = -3;
        goto malloc_error;
    }
    rl->probs = malloc(rl->boxes_number * sizeof(float *));
    if (rl->probs == NULL)
    {
        flag = -4;
        goto malloc_error;
    }
    /*rl->activate = malloc(256 * sizeof(float));
    if (rl->activate == NULL)
    {
        flag = -5;
        goto malloc_error;
    }
    rl->softmax = malloc(256 * sizeof(float));
    if (rl->softmax == NULL)
    {
        flag = -5;
        goto malloc_error;
    }
    for (int i = 0; i < 256; i++)
    {
        rl->activate[i] = 1.0 / (1.0 + expf(-(i * rl->scale + rl->bias)));
        rl->softmax[i] = expf(rl->scale * (i - 255));
    }*/
    for (uint32_t i = 0; i < rl->boxes_number; i++){
	rl->probs[i] = &(rl->probs_buf[i * (rl->classes + 1)]);}
	
	
    return 0;
malloc_error:
    //free(rl->output);
    free(rl->boxes);
    free(rl->probs_buf);
    free(rl->probs);
    //free(rl->activate);
    //free(rl->softmax);
    return flag;
}

void region_layer_deinit(region_layer_t *rl)
{
    //free(rl->output);
    free(rl->boxes);
    free(rl->probs_buf);
    free(rl->probs);
    //free(rl->activate);
    //free(rl->softmax);
}

static void activate_array(region_layer_t *rl, int index, int n)
{
    float *output = &rl->output[index];
    //uint8_t *input = &rl->input[index];

    for (int i = 0; i < n; ++i)
        output[i] = 1.0 / (1.0 + expf(-output[i]));//rl->activate[input[i]];
}

static int entry_index(region_layer_t *rl, int location, int entry)
{
    int wh = rl->layer_width * rl->layer_height;
    int n = location / wh;
    int loc = location % wh;

    return n * wh * (rl->coords + rl->classes + 1) + entry * wh + loc;
}

static void softmax(float *data, int n, int stride)
{
    int i;
    // int diff;
    // float e;
    float sum = 0;
    float largest_i = data[0];

    for (i = 0; i < n; ++i)
    {
        if (data[i * stride] > largest_i)
            largest_i = data[i * stride];
    }
    for (i = 0; i < n; ++i)
    {
        float value = expf(data[i * stride] - largest_i);
        sum += value;
        data[i * stride] = value;
    }
    for (i = 0; i < n; ++i)
	{
        data[i * stride] /= sum;
	}
}

static void softmax_cpu(float *data, int n, int batch, int batch_offset, int groups, int stride)
{
    int g, b;

    for (b = 0; b < batch; ++b)
    {
        for (g = 0; g < groups; ++g)
            softmax(data + b * batch_offset + g, n, stride);
    }
}

static void forward_region_layer(region_layer_t *rl)
{
    int index;

    //for (index = 0; index < rl->output_number; index++)
    //    rl->output[index] = rl->input[index] * rl->scale + rl->bias;

    for (int n = 0; n < rl->anchor_number; ++n)
    {
        index = entry_index(rl, n * rl->layer_width * rl->layer_height, 0);
        activate_array(rl, index, 2 * rl->layer_width * rl->layer_height);
        index = entry_index(rl, n * rl->layer_width * rl->layer_height, 4);
        activate_array(rl, index, rl->layer_width * rl->layer_height);
    }

    index = entry_index(rl, 0, rl->coords + 1);
	softmax_cpu(rl->output + index, rl->classes, rl->anchor_number,\
		rl->output_number / rl->anchor_number, rl->layer_width * rl->layer_height,\
		rl->layer_width * rl->layer_height);
	
}

static void correct_region_boxes(region_layer_t *rl, box_t *boxes)
{
    uint32_t net_width = rl->net_width;
    uint32_t net_height = rl->net_height;
    uint32_t image_width = rl->image_width;
    uint32_t image_height = rl->image_height;
    uint32_t boxes_number = rl->boxes_number;
    int new_w = 0;
    int new_h = 0;

    if (((float)net_width / image_width) <
        ((float)net_height / image_height))
    {
        new_w = net_width;
        new_h = (image_height * net_width) / image_width;
    }
    else
    {
        new_h = net_height;
        new_w = (image_width * net_height) / image_height;
    }
    for (int i = 0; i < boxes_number; ++i)
    {
        box_t b = boxes[i];

        b.x = (b.x - (net_width - new_w) / 2. / net_width) /
              ((float)new_w / net_width);
        b.y = (b.y - (net_height - new_h) / 2. / net_height) /
              ((float)new_h / net_height);
        b.w *= (float)net_width / new_w;
        b.h *= (float)net_height / new_h;
        boxes[i] = b;
    }
}

static box_t get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
{
    volatile box_t b;

    b.x = (i + x[index + 0 * stride]) / w;
    b.y = (j + x[index + 1 * stride]) / h;
    b.w = expf(x[index + 2 * stride]) * biases[2 * n] / w;
    b.h = expf(x[index + 3 * stride]) * biases[2 * n + 1] / h;
    return b;
}

static void get_region_boxes(region_layer_t *rl, float *predictions, float **probs, box_t *boxes)
{
    uint32_t layer_width = rl->layer_width;
    uint32_t layer_height = rl->layer_height;
    uint32_t anchor_number = rl->anchor_number;
    uint32_t classes = rl->classes;
    uint32_t coords = rl->coords;
    float threshold = rl->threshold;

    for (int i = 0; i < layer_width * layer_height; ++i)
    {
        int row = i / layer_width;
        int col = i % layer_width;

        for (int n = 0; n < anchor_number; ++n)
        {
            int index = n * layer_width * layer_height + i;

            for (int j = 0; j < classes; ++j)
                probs[index][j] = 0;
            int obj_index = entry_index(rl, n * layer_width * layer_height + i, coords);
            int box_index = entry_index(rl, n * layer_width * layer_height + i, 0);
            float scale = predictions[obj_index];

            boxes[index] = get_region_box(predictions, rl->anchor, n, box_index, col, row,
                                          layer_width, layer_height, layer_width * layer_height);

            float max = 0;

            for (int j = 0; j < classes; ++j)
            {
                int class_index = entry_index(rl, n * layer_width * layer_height + i, coords + 1 + j);
                float prob = scale * predictions[class_index];

                probs[index][j] = (prob > threshold) ? prob : 0;
                if (prob > max)
                    max = prob;
            }
            probs[index][classes] = max;
        }
    }
    correct_region_boxes(rl, boxes);
}

static int nms_comparator(const void *pa, const void *pb)
{
    sortable_box_t a = *(sortable_box_t *)pa;
    sortable_box_t b = *(sortable_box_t *)pb;
    float diff = a.probs[a.index][b.class] - b.probs[b.index][b.class];

    if (diff < 0)
        return 1;
    else if (diff > 0)
        return -1;
    return 0;
}

static float overlap(float x1, float w1, float x2, float w2)
{
    float l1 = x1 - w1 / 2;
    float l2 = x2 - w2 / 2;
    float left = l1 > l2 ? l1 : l2;
    float r1 = x1 + w1 / 2;
    float r2 = x2 + w2 / 2;
    float right = r1 < r2 ? r1 : r2;

    return right - left;
}

static float box_intersection(box_t a, box_t b)
{
    float w = overlap(a.x, a.w, b.x, b.w);
    float h = overlap(a.y, a.h, b.y, b.h);

    if (w < 0 || h < 0)
        return 0;
    return w * h;
}

static float box_union(box_t a, box_t b)
{
    float i = box_intersection(a, b);
    float u = a.w * a.h + b.w * b.h - i;

    return u;
}

static float box_iou(box_t a, box_t b)
{
    return box_intersection(a, b) / box_union(a, b);
}

static void do_nms_sort(region_layer_t *rl, box_t *boxes, float **probs)
{
    uint32_t boxes_number = rl->boxes_number;
    uint32_t classes = rl->classes;
    float nms_value = rl->nms_value;
    int i, j, k;
    sortable_box_t s[boxes_number];

    for (i = 0; i < boxes_number; ++i)
    {
        s[i].index = i;
        s[i].class = 0;
        s[i].probs = probs;
    }

    for (k = 0; k < classes; ++k)
    {
        for (i = 0; i < boxes_number; ++i)
            s[i].class = k;
        qsort(s, boxes_number, sizeof(sortable_box_t), nms_comparator);
        for (i = 0; i < boxes_number; ++i)
        {
            if (probs[s[i].index][k] == 0)
                continue;
            box_t a = boxes[s[i].index];

            for (j = i + 1; j < boxes_number; ++j)
            {
                box_t b = boxes[s[j].index];

                if (box_iou(a, b) > nms_value)
                    probs[s[j].index][k] = 0;
            }
        }
    }
}

static int max_index(float *a, int n)
{
    int i, max_i = 0;
    float max = a[0];

    for (i = 1; i < n; ++i)
    {
        if (a[i] > max)
        {
            max = a[i];
            max_i = i;
        }
    }
    return max_i;
}

static void region_layer_output(region_layer_t *rl, obj_info_t *obj_info)
{
    uint32_t obj_number = 0;
    uint32_t image_width = rl->image_width;
    uint32_t image_height = rl->image_height;
    float threshold = rl->threshold;
    box_t *boxes = (box_t *)rl->boxes;

    for (int i = 0; i < rl->boxes_number; ++i)
    {
        int class = max_index(rl->probs[i], rl->classes);
        float prob = rl->probs[i][class];
		//mp_printf(&mp_plat_print, "%.3f, ", prob);
        if (prob > threshold)
        {
            box_t *b = boxes + i;
            obj_info->obj[obj_number].x1 = b->x * image_width - (b->w * image_width / 2);
            obj_info->obj[obj_number].y1 = b->y * image_height - (b->h * image_height / 2);
            obj_info->obj[obj_number].x2 = b->x * image_width + (b->w * image_width / 2);
            obj_info->obj[obj_number].y2 = b->y * image_height + (b->h * image_height / 2);
            obj_info->obj[obj_number].classid = class;
            obj_info->obj[obj_number].prob = prob;
            obj_number++;
        }
    }
    obj_info->obj_number = obj_number;
}

void region_layer_run(region_layer_t *rl, obj_info_t *obj_info)
{
	forward_region_layer(rl);
    get_region_boxes(rl, rl->output, rl->probs, rl->boxes);
    do_nms_sort(rl, rl->boxes, rl->probs);
    region_layer_output(rl, obj_info);
}

void region_layer_draw_boxes(region_layer_t *rl, callback_draw_box callback)
{
    uint32_t image_width = rl->image_width;
    uint32_t image_height = rl->image_height;
    float threshold = rl->threshold;
    box_t *boxes = (box_t *)rl->boxes;

    for (int i = 0; i < rl->boxes_number; ++i)
    {
        int class = max_index(rl->probs[i], rl->classes);
        float prob = rl->probs[i][class];

        if (prob > threshold)
        {
            box_t *b = boxes + i;
            uint32_t x1 = b->x * image_width - (b->w * image_width / 2);
            uint32_t y1 = b->y * image_height - (b->h * image_height / 2);
            uint32_t x2 = b->x * image_width + (b->w * image_width / 2);
            uint32_t y2 = b->y * image_height + (b->h * image_height / 2);
            callback(x1, y1, x2, y2, class, prob);
        }
    }
}

// static void drawboxes(uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2, uint32_t class, float prob)
// {
//     if (x1 >= 320)
//         x1 = 319;
//     if (x2 >= 320)
//         x2 = 319;
//     if (y1 >= 240)
//         y1 = 239;
//     if (y2 >= 240)
//         y2 = 239;

//     lcd_draw_rectangle(x1, y1, x2, y2, 2, RED);
// }

/* end of region_layer.c*/