@@ -2573,7 +2573,7 @@ static HI_S32 SVP_NNIE_Yolov2_GetResult(HI_S32 *ps32InputData,HI_U32 u32GridNumW
2573
2573
* Modification : Create
2574
2574
*
2575
2575
*****************************************************************************/
2576
- static HI_S32 SVP_NNIE_Yolov3_GetResult (HI_S32 * * pps32InputData ,HI_U32 au32GridNumWidth [],
2576
+ static HI_S32 SVP_NNIE_Yolov3_GetResult_bak (HI_S32 * * pps32InputData ,HI_U32 au32GridNumWidth [],
2577
2577
HI_U32 au32GridNumHeight [],HI_U32 au32Stride [],HI_U32 u32EachGridBbox ,HI_U32 u32ClassNum ,HI_U32 u32SrcWidth ,
2578
2578
HI_U32 u32SrcHeight ,HI_U32 u32MaxRoiNum ,HI_U32 u32NmsThresh ,HI_U32 u32ConfThresh ,
2579
2579
HI_FLOAT af32Bias [SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM ][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM ],
@@ -2733,6 +2733,208 @@ static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_S32 **pps32InputData,HI_U32 au32GridN
2733
2733
return HI_SUCCESS ;
2734
2734
}
2735
2735
2736
+ static HI_S32 SVP_NNIE_Yolov3_GetResult (HI_S32 * * pps32InputData ,HI_U32 au32GridNumWidth [],
2737
+ HI_U32 au32GridNumHeight [],HI_U32 au32Stride [],HI_U32 u32EachGridBbox ,HI_U32 u32ClassNum ,HI_U32 u32SrcWidth ,
2738
+ HI_U32 u32SrcHeight ,HI_U32 u32MaxRoiNum ,HI_U32 u32NmsThresh ,HI_U32 u32ConfThresh ,
2739
+ HI_FLOAT af32Bias [SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM ][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM ],
2740
+ HI_S32 * ps32TmpBuf ,HI_S32 * ps32DstScore , HI_S32 * ps32DstRoi , HI_S32 * ps32ClassRoiNum )
2741
+ {
2742
+ // print debug time
2743
+ double post_start ;
2744
+ post_start = getTimeOfMSeconds ();
2745
+
2746
+
2747
+ HI_S32 * ps32InputBlob = NULL ;
2748
+ HI_FLOAT * pf32Permute = NULL ;
2749
+ SAMPLE_SVP_NNIE_YOLOV3_BBOX_S * pstBbox = NULL ;
2750
+ HI_S32 * ps32AssistBuf = NULL ;
2751
+ HI_U32 u32TotalBboxNum = 0 ;
2752
+ HI_U32 u32ChnOffset = 0 ;
2753
+ HI_U32 u32HeightOffset = 0 ;
2754
+ HI_U32 u32BboxNum = 0 ;
2755
+ HI_U32 u32GridXIdx ;
2756
+ HI_U32 u32GridYIdx ;
2757
+ HI_U32 u32Offset ;
2758
+ HI_FLOAT f32StartX ;
2759
+ HI_FLOAT f32StartY ;
2760
+ HI_FLOAT f32Width ;
2761
+ HI_FLOAT f32Height ;
2762
+ HI_FLOAT f32ObjScore ;
2763
+ HI_U32 u32MaxValueIndex = 0 ;
2764
+ HI_FLOAT f32MaxScore ;
2765
+ HI_S32 s32ClassScore ;
2766
+ HI_U32 u32ClassRoiNum ;
2767
+ HI_U32 i = 0 , j = 0 , k = 0 , c = 0 , h = 0 , w = 0 ;
2768
+ HI_U32 u32BlobSize = 0 ;
2769
+ HI_U32 u32MaxBlobSize = 0 ;
2770
+
2771
+
2772
+
2773
+ for (i = 0 ; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM ; i ++ )
2774
+ {
2775
+ //u32BlobSize = au32GridNumWidth[i]*au32GridNumHeight[i]*sizeof(HI_U32)*SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox;
2776
+ u32BlobSize = au32GridNumWidth [i ]* au32GridNumHeight [i ]* sizeof (HI_U32 )*
2777
+ (5 + u32ClassNum )* u32EachGridBbox ;
2778
+ //printf("\nu32BlobSize:%d u32BlobSize:%d\n",u32BlobSize,u32BlobSize);
2779
+ if (u32MaxBlobSize < u32BlobSize )
2780
+ {
2781
+ u32MaxBlobSize = u32BlobSize ;
2782
+ }
2783
+ }
2784
+
2785
+ for (i = 0 ; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM ; i ++ )
2786
+ {
2787
+ u32TotalBboxNum += au32GridNumWidth [i ]* au32GridNumHeight [i ]* u32EachGridBbox ;
2788
+ }
2789
+
2790
+ //get each tmpbuf addr
2791
+ pf32Permute = (HI_FLOAT * )ps32TmpBuf ;
2792
+ pstBbox = (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S * )(pf32Permute + u32MaxBlobSize /sizeof (HI_S32 ));
2793
+ ps32AssistBuf = (HI_S32 * )(pstBbox + u32TotalBboxNum );
2794
+
2795
+ printf ("post process time1: %f\n" , getTimeOfMSeconds () - post_start );
2796
+
2797
+ for (i = 0 ; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM ; i ++ )
2798
+ {
2799
+ //permute
2800
+ u32Offset = 0 ;
2801
+ ps32InputBlob = pps32InputData [i ];
2802
+ u32ChnOffset = au32GridNumHeight [i ]* au32Stride [i ]/sizeof (HI_S32 );
2803
+ u32HeightOffset = au32Stride [i ]/sizeof (HI_S32 );
2804
+ for (h = 0 ; h < au32GridNumHeight [i ]; h ++ )
2805
+ {
2806
+ for (w = 0 ; w < au32GridNumWidth [i ]; w ++ )
2807
+ {
2808
+ //for (c = 0; c < SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox; c++)
2809
+ for (c = 0 ; c < (5 + u32ClassNum )* u32EachGridBbox ; c ++ )
2810
+ {
2811
+ pf32Permute [u32Offset ++ ] = (HI_FLOAT )(ps32InputBlob [c * u32ChnOffset + h * u32HeightOffset + w ]) / SAMPLE_SVP_NNIE_QUANT_BASE ;
2812
+ }
2813
+ }
2814
+ }
2815
+
2816
+
2817
+ // // debug
2818
+ // printf("ID:%d, au32GridNumWidth:%d, au32GridNumHeight:%d\n",
2819
+ // i,au32GridNumWidth[i], au32GridNumHeight[i]
2820
+ // );
2821
+
2822
+
2823
+ //decode bbox and calculate score
2824
+ for (j = 0 ; j < au32GridNumWidth [i ]* au32GridNumHeight [i ]; j ++ )
2825
+ {
2826
+ u32GridXIdx = j % au32GridNumWidth [i ];
2827
+ u32GridYIdx = j / au32GridNumWidth [i ];
2828
+ for (k = 0 ; k < u32EachGridBbox ; k ++ )
2829
+ {
2830
+ u32MaxValueIndex = 0 ;
2831
+ // u32Offset = (j * u32EachGridBbox + k) * SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM;
2832
+ u32Offset = (j * u32EachGridBbox + k ) * (5 + u32ClassNum );
2833
+
2834
+ //calculate score
2835
+ f32ObjScore = SAMPLE_SVP_NNIE_SIGMOID (pf32Permute [u32Offset + 4 ]);
2836
+
2837
+ //(void)SVP_NNIE_SoftMax(&pf32Permute[u32Offset + 5], u32ClassNum);
2838
+ for (HI_U32 idx = 0 ; idx < u32ClassNum ; idx ++ )
2839
+ {
2840
+ pf32Permute [u32Offset + 5 + idx ] = SAMPLE_SVP_NNIE_SIGMOID (pf32Permute [u32Offset + 5 + idx ]);
2841
+ }
2842
+
2843
+ f32MaxScore = SVP_NNIE_Yolov2_GetMaxVal (& pf32Permute [u32Offset + 5 ], u32ClassNum , & u32MaxValueIndex );
2844
+ s32ClassScore = (HI_S32 )(f32MaxScore * f32ObjScore * SAMPLE_SVP_NNIE_QUANT_BASE );
2845
+
2846
+ //filter low score roi
2847
+ if (s32ClassScore > u32ConfThresh )
2848
+ {
2849
+ //decoded box
2850
+ #if 0 //yolov3, yolov4 use
2851
+ //decode bbox
2852
+ f32StartX = ((HI_FLOAT )u32GridXIdx + SAMPLE_SVP_NNIE_SIGMOID (pf32Permute [u32Offset + 0 ])) / au32GridNumWidth [i ];
2853
+ f32StartY = ((HI_FLOAT )u32GridYIdx + SAMPLE_SVP_NNIE_SIGMOID (pf32Permute [u32Offset + 1 ])) / au32GridNumHeight [i ];
2854
+ f32Width = (HI_FLOAT )(exp (pf32Permute [u32Offset + 2 ]) * af32Bias [i ][2 * k ]) / u32SrcWidth ;
2855
+ f32Height = (HI_FLOAT )(exp (pf32Permute [u32Offset + 3 ]) * af32Bias [i ][2 * k + 1 ]) / u32SrcHeight ;
2856
+ #else //yolov5 use
2857
+ f32StartX = SAMPLE_SVP_NNIE_SIGMOID (pf32Permute [u32Offset + 0 ]);
2858
+ f32StartY = SAMPLE_SVP_NNIE_SIGMOID (pf32Permute [u32Offset + 1 ]);
2859
+ f32Width = SAMPLE_SVP_NNIE_SIGMOID (pf32Permute [u32Offset + 2 ]);
2860
+ f32Height = SAMPLE_SVP_NNIE_SIGMOID (pf32Permute [u32Offset + 3 ]);
2861
+ f32StartX = (f32StartX * 2 - 0.5 + (HI_FLOAT )u32GridXIdx )/au32GridNumWidth [i ];
2862
+ f32StartY = (f32StartY * 2 - 0.5 + (HI_FLOAT )u32GridYIdx )/au32GridNumHeight [i ];
2863
+ f32Width = (f32Width * 2. )* (f32Width * 2. )* af32Bias [i ][2 * k ]/u32SrcWidth ;
2864
+ f32Height = (f32Height * 2. )* (f32Height * 2. )* af32Bias [i ][2 * k + 1 ]/u32SrcHeight ;
2865
+ #endif
2866
+
2867
+
2868
+
2869
+ pstBbox [u32BboxNum ].f32Xmin = (HI_FLOAT )(f32StartX - f32Width * 0.5f );
2870
+ pstBbox [u32BboxNum ].f32Ymin = (HI_FLOAT )(f32StartY - f32Height * 0.5f );
2871
+ pstBbox [u32BboxNum ].f32Xmax = (HI_FLOAT )(f32StartX + f32Width * 0.5f );
2872
+ pstBbox [u32BboxNum ].f32Ymax = (HI_FLOAT )(f32StartY + f32Height * 0.5f );
2873
+ pstBbox [u32BboxNum ].s32ClsScore = s32ClassScore ;
2874
+ pstBbox [u32BboxNum ].u32Mask = 0 ;
2875
+ pstBbox [u32BboxNum ].u32ClassIdx = (HI_S32 )(u32MaxValueIndex + 1 );
2876
+ u32BboxNum ++ ;
2877
+
2878
+ // // debug
2879
+ // printf("xmin:%f, ymin:%f, xmax:%f, ymax:%f, score:%d\n",
2880
+ // (HI_FLOAT)(f32StartX - f32Width * 0.5f),
2881
+ // (HI_FLOAT)(f32StartY - f32Height * 0.5f),
2882
+ // (HI_FLOAT)(f32StartX + f32Width * 0.5f),
2883
+ // (HI_FLOAT)(f32StartY + f32Height * 0.5f),
2884
+ // s32ClassScore);
2885
+ }
2886
+ }
2887
+ }
2888
+ }
2889
+
2890
+ printf ("post process time2: %f\n" , getTimeOfMSeconds () - post_start );
2891
+
2892
+ //quick sort
2893
+ (void )SVP_NNIE_Yolo_NonRecursiveArgQuickSort ((HI_S32 * )pstBbox , 0 , u32BboxNum - 1 ,
2894
+ sizeof (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S )/sizeof (HI_U32 ),4 ,(SAMPLE_SVP_NNIE_STACK_S * )ps32AssistBuf );
2895
+
2896
+ printf ("post process time3: sort time: %f\n" , getTimeOfMSeconds () - post_start );
2897
+
2898
+ //Yolov3 and Yolov2 have the same Nms operation
2899
+ (void )SVP_NNIE_Yolov2_NonMaxSuppression (pstBbox , u32BboxNum , u32NmsThresh , sizeof (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S )/sizeof (HI_U32 ));
2900
+
2901
+ printf ("post process time4: nms time: %f\n" , getTimeOfMSeconds () - post_start );
2902
+
2903
+ //Get result
2904
+ printf ("u32ClassNum:%u\n" ,u32ClassNum );
2905
+ for (i = 1 ; i < u32ClassNum + 1 ; i ++ )
2906
+ {
2907
+ //printf("i:%u\n",i);
2908
+ u32ClassRoiNum = 0 ;
2909
+ for (j = 0 ; j < u32BboxNum ; j ++ )
2910
+ {
2911
+ if ((0 == pstBbox [j ].u32Mask ) && (i == pstBbox [j ].u32ClassIdx ) && (u32ClassRoiNum < u32MaxRoiNum ))
2912
+ {
2913
+ * (ps32DstRoi ++ ) = SAMPLE_SVP_NNIE_MAX ((HI_S32 )(pstBbox [j ].f32Xmin * u32SrcWidth ), 0 );
2914
+ * (ps32DstRoi ++ ) = SAMPLE_SVP_NNIE_MAX ((HI_S32 )(pstBbox [j ].f32Ymin * u32SrcHeight ), 0 );
2915
+ * (ps32DstRoi ++ ) = SAMPLE_SVP_NNIE_MIN ((HI_S32 )(pstBbox [j ].f32Xmax * u32SrcWidth ), u32SrcWidth );
2916
+ * (ps32DstRoi ++ ) = SAMPLE_SVP_NNIE_MIN ((HI_S32 )(pstBbox [j ].f32Ymax * u32SrcHeight ), u32SrcHeight );
2917
+ * (ps32DstScore ++ ) = pstBbox [j ].s32ClsScore ;
2918
+ u32ClassRoiNum ++ ;
2919
+
2920
+ // // debug
2921
+ // printf("xmin:%d, ymin:%d, xmax:%d, ymax:%d, score:%d, srcwidth:%d, srcheight:%d\n",
2922
+ // SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin*u32SrcWidth), 0),
2923
+ // SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin*u32SrcHeight), 0),
2924
+ // SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax*u32SrcWidth), (HI_S32)u32SrcWidth),
2925
+ // SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax*u32SrcHeight), (HI_S32)u32SrcHeight),
2926
+ // pstBbox[j].s32ClsScore,
2927
+ // u32SrcWidth,
2928
+ // u32SrcHeight);
2929
+ }
2930
+ }
2931
+ * (ps32ClassRoiNum + i ) = u32ClassRoiNum ;
2932
+ }
2933
+ printf ("post process time5: all post process time %f\n" , getTimeOfMSeconds () - post_start );
2934
+
2935
+ return HI_SUCCESS ;
2936
+ }
2937
+
2736
2938
2737
2939
/*****************************************************************************
2738
2940
* Prototype : SAMPLE_COMM_SVP_NNIE_CnnGetTopN
0 commit comments