Skip to content

Commit 0630e0b

Browse files
author
JimmyLauren
authored
Update sample_svp_nnie_software.c
fix conf calc bug
1 parent 9eac782 commit 0630e0b

File tree

1 file changed

+203
-1
lines changed

1 file changed

+203
-1
lines changed

04.multi-core-sample/nnie/sample_nnie_software/sample_svp_nnie_software.c

Lines changed: 203 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2573,7 +2573,7 @@ static HI_S32 SVP_NNIE_Yolov2_GetResult(HI_S32 *ps32InputData,HI_U32 u32GridNumW
25732573
* Modification : Create
25742574
*
25752575
*****************************************************************************/
2576-
static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_S32 **pps32InputData,HI_U32 au32GridNumWidth[],
2576+
static HI_S32 SVP_NNIE_Yolov3_GetResult_bak(HI_S32 **pps32InputData,HI_U32 au32GridNumWidth[],
25772577
HI_U32 au32GridNumHeight[],HI_U32 au32Stride[],HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth,
25782578
HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh,
25792579
HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],
@@ -2733,6 +2733,208 @@ static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_S32 **pps32InputData,HI_U32 au32GridN
27332733
return HI_SUCCESS;
27342734
}
27352735

2736+
static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_S32 **pps32InputData,HI_U32 au32GridNumWidth[],
2737+
HI_U32 au32GridNumHeight[],HI_U32 au32Stride[],HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth,
2738+
HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh,
2739+
HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],
2740+
HI_S32* ps32TmpBuf,HI_S32 *ps32DstScore, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
2741+
{
2742+
// print debug time
2743+
double post_start;
2744+
post_start = getTimeOfMSeconds();
2745+
2746+
2747+
HI_S32 *ps32InputBlob = NULL;
2748+
HI_FLOAT *pf32Permute = NULL;
2749+
SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *pstBbox = NULL;
2750+
HI_S32 *ps32AssistBuf = NULL;
2751+
HI_U32 u32TotalBboxNum = 0;
2752+
HI_U32 u32ChnOffset = 0;
2753+
HI_U32 u32HeightOffset = 0;
2754+
HI_U32 u32BboxNum = 0;
2755+
HI_U32 u32GridXIdx;
2756+
HI_U32 u32GridYIdx;
2757+
HI_U32 u32Offset;
2758+
HI_FLOAT f32StartX;
2759+
HI_FLOAT f32StartY;
2760+
HI_FLOAT f32Width;
2761+
HI_FLOAT f32Height;
2762+
HI_FLOAT f32ObjScore;
2763+
HI_U32 u32MaxValueIndex = 0;
2764+
HI_FLOAT f32MaxScore;
2765+
HI_S32 s32ClassScore;
2766+
HI_U32 u32ClassRoiNum;
2767+
HI_U32 i = 0, j = 0, k = 0, c = 0, h = 0, w = 0;
2768+
HI_U32 u32BlobSize = 0;
2769+
HI_U32 u32MaxBlobSize = 0;
2770+
2771+
2772+
2773+
for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
2774+
{
2775+
//u32BlobSize = au32GridNumWidth[i]*au32GridNumHeight[i]*sizeof(HI_U32)*SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox;
2776+
u32BlobSize = au32GridNumWidth[i]*au32GridNumHeight[i]*sizeof(HI_U32)*
2777+
(5+u32ClassNum)*u32EachGridBbox;
2778+
//printf("\nu32BlobSize:%d u32BlobSize:%d\n",u32BlobSize,u32BlobSize);
2779+
if(u32MaxBlobSize < u32BlobSize)
2780+
{
2781+
u32MaxBlobSize = u32BlobSize;
2782+
}
2783+
}
2784+
2785+
for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
2786+
{
2787+
u32TotalBboxNum += au32GridNumWidth[i]*au32GridNumHeight[i]*u32EachGridBbox;
2788+
}
2789+
2790+
//get each tmpbuf addr
2791+
pf32Permute = (HI_FLOAT*)ps32TmpBuf;
2792+
pstBbox = (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S*)(pf32Permute+u32MaxBlobSize/sizeof(HI_S32));
2793+
ps32AssistBuf = (HI_S32*)(pstBbox+u32TotalBboxNum);
2794+
2795+
printf("post process time1: %f\n", getTimeOfMSeconds() - post_start);
2796+
2797+
for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
2798+
{
2799+
//permute
2800+
u32Offset = 0;
2801+
ps32InputBlob = pps32InputData[i];
2802+
u32ChnOffset = au32GridNumHeight[i]*au32Stride[i]/sizeof(HI_S32);
2803+
u32HeightOffset = au32Stride[i]/sizeof(HI_S32);
2804+
for (h = 0; h < au32GridNumHeight[i]; h++)
2805+
{
2806+
for (w = 0; w < au32GridNumWidth[i]; w++)
2807+
{
2808+
//for (c = 0; c < SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox; c++)
2809+
for (c = 0; c < (5+u32ClassNum)*u32EachGridBbox; c++)
2810+
{
2811+
pf32Permute[u32Offset++] = (HI_FLOAT)(ps32InputBlob[c*u32ChnOffset+h*u32HeightOffset+w]) / SAMPLE_SVP_NNIE_QUANT_BASE;
2812+
}
2813+
}
2814+
}
2815+
2816+
2817+
// // debug
2818+
// printf("ID:%d, au32GridNumWidth:%d, au32GridNumHeight:%d\n",
2819+
// i,au32GridNumWidth[i], au32GridNumHeight[i]
2820+
// );
2821+
2822+
2823+
//decode bbox and calculate score
2824+
for(j = 0; j < au32GridNumWidth[i]*au32GridNumHeight[i]; j++)
2825+
{
2826+
u32GridXIdx = j % au32GridNumWidth[i];
2827+
u32GridYIdx = j / au32GridNumWidth[i];
2828+
for (k = 0; k < u32EachGridBbox; k++)
2829+
{
2830+
u32MaxValueIndex = 0;
2831+
// u32Offset = (j * u32EachGridBbox + k) * SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM;
2832+
u32Offset = (j * u32EachGridBbox + k) * (5+u32ClassNum);
2833+
2834+
//calculate score
2835+
f32ObjScore = SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 4]);
2836+
2837+
//(void)SVP_NNIE_SoftMax(&pf32Permute[u32Offset + 5], u32ClassNum);
2838+
for(HI_U32 idx = 0; idx < u32ClassNum; idx++)
2839+
{
2840+
pf32Permute[u32Offset + 5 + idx] = SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 5 + idx]);
2841+
}
2842+
2843+
f32MaxScore = SVP_NNIE_Yolov2_GetMaxVal(&pf32Permute[u32Offset + 5], u32ClassNum, &u32MaxValueIndex);
2844+
s32ClassScore = (HI_S32)(f32MaxScore * f32ObjScore*SAMPLE_SVP_NNIE_QUANT_BASE);
2845+
2846+
//filter low score roi
2847+
if (s32ClassScore > u32ConfThresh)
2848+
{
2849+
//decoded box
2850+
#if 0 //yolov3, yolov4 use
2851+
//decode bbox
2852+
f32StartX = ((HI_FLOAT)u32GridXIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 0])) / au32GridNumWidth[i];
2853+
f32StartY = ((HI_FLOAT)u32GridYIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 1])) / au32GridNumHeight[i];
2854+
f32Width = (HI_FLOAT)(exp(pf32Permute[u32Offset + 2]) * af32Bias[i][2*k]) / u32SrcWidth;
2855+
f32Height = (HI_FLOAT)(exp(pf32Permute[u32Offset + 3]) * af32Bias[i][2*k + 1]) / u32SrcHeight;
2856+
#else //yolov5 use
2857+
f32StartX = SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset+0]);
2858+
f32StartY = SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset+1]);
2859+
f32Width = SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset+2]);
2860+
f32Height = SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset+3]);
2861+
f32StartX = (f32StartX*2-0.5+(HI_FLOAT)u32GridXIdx)/au32GridNumWidth[i];
2862+
f32StartY = (f32StartY*2-0.5+(HI_FLOAT)u32GridYIdx)/au32GridNumHeight[i];
2863+
f32Width = (f32Width*2.)*(f32Width*2.)*af32Bias[i][2*k]/u32SrcWidth;
2864+
f32Height = (f32Height*2.)*(f32Height*2.)*af32Bias[i][2*k+1]/u32SrcHeight;
2865+
#endif
2866+
2867+
2868+
2869+
pstBbox[u32BboxNum].f32Xmin= (HI_FLOAT)(f32StartX - f32Width * 0.5f);
2870+
pstBbox[u32BboxNum].f32Ymin= (HI_FLOAT)(f32StartY - f32Height * 0.5f);
2871+
pstBbox[u32BboxNum].f32Xmax= (HI_FLOAT)(f32StartX + f32Width * 0.5f);
2872+
pstBbox[u32BboxNum].f32Ymax= (HI_FLOAT)(f32StartY + f32Height * 0.5f);
2873+
pstBbox[u32BboxNum].s32ClsScore = s32ClassScore;
2874+
pstBbox[u32BboxNum].u32Mask= 0;
2875+
pstBbox[u32BboxNum].u32ClassIdx = (HI_S32)(u32MaxValueIndex+1);
2876+
u32BboxNum++;
2877+
2878+
// // debug
2879+
// printf("xmin:%f, ymin:%f, xmax:%f, ymax:%f, score:%d\n",
2880+
// (HI_FLOAT)(f32StartX - f32Width * 0.5f),
2881+
// (HI_FLOAT)(f32StartY - f32Height * 0.5f),
2882+
// (HI_FLOAT)(f32StartX + f32Width * 0.5f),
2883+
// (HI_FLOAT)(f32StartY + f32Height * 0.5f),
2884+
// s32ClassScore);
2885+
}
2886+
}
2887+
}
2888+
}
2889+
2890+
printf("post process time2: %f\n", getTimeOfMSeconds() - post_start);
2891+
2892+
//quick sort
2893+
(void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32*)pstBbox, 0, u32BboxNum - 1,
2894+
sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32),4,(SAMPLE_SVP_NNIE_STACK_S*)ps32AssistBuf);
2895+
2896+
printf("post process time3: sort time: %f\n", getTimeOfMSeconds() - post_start);
2897+
2898+
//Yolov3 and Yolov2 have the same Nms operation
2899+
(void)SVP_NNIE_Yolov2_NonMaxSuppression(pstBbox, u32BboxNum, u32NmsThresh, sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32));
2900+
2901+
printf("post process time4: nms time: %f\n", getTimeOfMSeconds() - post_start);
2902+
2903+
//Get result
2904+
printf("u32ClassNum:%u\n",u32ClassNum);
2905+
for (i = 1; i < u32ClassNum+1; i++)
2906+
{
2907+
//printf("i:%u\n",i);
2908+
u32ClassRoiNum = 0;
2909+
for(j = 0; j < u32BboxNum; j++)
2910+
{
2911+
if ((0 == pstBbox[j].u32Mask) && (i == pstBbox[j].u32ClassIdx) && (u32ClassRoiNum < u32MaxRoiNum))
2912+
{
2913+
*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin*u32SrcWidth), 0);
2914+
*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin*u32SrcHeight), 0);
2915+
*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax*u32SrcWidth), u32SrcWidth);
2916+
*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax*u32SrcHeight), u32SrcHeight);
2917+
*(ps32DstScore++) = pstBbox[j].s32ClsScore;
2918+
u32ClassRoiNum++;
2919+
2920+
// // debug
2921+
// printf("xmin:%d, ymin:%d, xmax:%d, ymax:%d, score:%d, srcwidth:%d, srcheight:%d\n",
2922+
// SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin*u32SrcWidth), 0),
2923+
// SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin*u32SrcHeight), 0),
2924+
// SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax*u32SrcWidth), (HI_S32)u32SrcWidth),
2925+
// SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax*u32SrcHeight), (HI_S32)u32SrcHeight),
2926+
// pstBbox[j].s32ClsScore,
2927+
// u32SrcWidth,
2928+
// u32SrcHeight);
2929+
}
2930+
}
2931+
*(ps32ClassRoiNum+i) = u32ClassRoiNum;
2932+
}
2933+
printf("post process time5: all post process time %f\n", getTimeOfMSeconds() - post_start);
2934+
2935+
return HI_SUCCESS;
2936+
}
2937+
27362938

27372939
/*****************************************************************************
27382940
* Prototype : SAMPLE_COMM_SVP_NNIE_CnnGetTopN

0 commit comments

Comments
 (0)