guirun.py

from mvnc import mvncapi as mvnc
import sys
import numpy as np
import cv2
import time
from picamera.array import PiRGBArray
from picamera import PiCamera
import time
import os
import random
import math
# Assume running in examples/caffe/TinyYolo and graph file is in current directory.
#input_image_file= '../../data/images/nps_chair.png'
#input_image_file= './dog.jpg'
input_image_file = 'car.jpg'
#input_image_file = 'traffic.jpg'
tiny_yolo_graph_file= './graph'

# Tiny Yolo assumes input images are these dimensions.
NETWORK_IMAGE_WIDTH = 448
NETWORK_IMAGE_HEIGHT = 448

# Interpret the output from a single inference of TinyYolo (GetResult)
# and filter out objects/boxes with low probabilities.
# output is the array of floats returned from the API GetResult but converted
# to float32 format.
# input_image_width is the width of the input image
# input_image_height is the height of the input image
# Returns a list of lists. each of the inner lists represent one found object and contain
# the following 6 values:
#    string that is network classification ie 'cat', or 'chair' etc
#    float value for box center X pixel location within source image
#    float value for box center Y pixel location within source image
#    float value for box width in pixels within source image
#    float value for box height in pixels within source image
#    float value that is the probability for the network classification.


def filter_objects(inference_result, input_image_width, input_image_height):

    # the raw number of floats returned from the inference (GetResult())
    num_inference_results = len(inference_result)
    
    network_classifications = ["miscellaneous", "bicycle", "miscellaneous", "miscellaneous", "miscellaneous", "bus", "car",
                               "miscellaneous", "miscellaneous", "miscellaneous", "miscellaneous", "miscellaneous", "miscellaneous", "motorbike",
                               "person", "miscellaneous", "miscellaneous", "miscellaneous", "train","miscellaneous"]

    # only keep boxes with probabilities greater than this
    probability_threshold = 0.09

    num_classifications = len(network_classifications) # should be 20
    grid_size = 7 # the image is a 7x7 grid.  Each box in the grid is 64x64 pixels
    boxes_per_grid_cell = 2 # the number of boxes returned for each grid cell

    # grid_size is 7 (grid is 7x7)
    # num classifications is 20
    # boxes per grid cell is 2
    all_probabilities = np.zeros((grid_size, grid_size, boxes_per_grid_cell, num_classifications))

    # classification_probabilities  contains a probability for each classification for
    # each 64x64 pixel square of the grid.  The source image contains
    # 7x7 of these 64x64 pixel squares and there are 20 possible classifications
    classification_probabilities = \
        np.reshape(inference_result[0:980], (grid_size, grid_size, num_classifications))
    num_of_class_probs = len(classification_probabilities)

    # The probability scale factor for each box
    box_prob_scale_factor = np.reshape(inference_result[980:1078], (grid_size, grid_size, boxes_per_grid_cell))

    # get the boxes from the results and adjust to be pixel units
    all_boxes = np.reshape(inference_result[1078:], (grid_size, grid_size, boxes_per_grid_cell, 4))
    boxes_to_pixel_units(all_boxes, input_image_width, input_image_height, grid_size)

    # adjust the probabilities with the scaling factor
    for box_index in range(boxes_per_grid_cell): # loop over boxes
        for class_index in range(num_classifications): # loop over classifications
            all_probabilities[:,:,box_index,class_index] = np.multiply(classification_probabilities[:,:,class_index],box_prob_scale_factor[:,:,box_index])


    probability_threshold_mask = np.array(all_probabilities>=probability_threshold, dtype='bool')
    box_threshold_mask = np.nonzero(probability_threshold_mask)
    boxes_above_threshold = all_boxes[box_threshold_mask[0],box_threshold_mask[1],box_threshold_mask[2]]
    classifications_for_boxes_above = np.argmax(all_probabilities,axis=3)[box_threshold_mask[0],box_threshold_mask[1],box_threshold_mask[2]]
    probabilities_above_threshold = all_probabilities[probability_threshold_mask]

    # sort the boxes from highest probability to lowest and then
    # sort the probabilities and classifications to match
    argsort = np.array(np.argsort(probabilities_above_threshold))[::-1]
    boxes_above_threshold = boxes_above_threshold[argsort]
    classifications_for_boxes_above = classifications_for_boxes_above[argsort]
    probabilities_above_threshold = probabilities_above_threshold[argsort]


    # get mask for boxes that seem to be the same object
    duplicate_box_mask = get_duplicate_box_mask(boxes_above_threshold)

    # update the boxes, probabilities and classifications removing duplicates.
    boxes_above_threshold = boxes_above_threshold[duplicate_box_mask]
    classifications_for_boxes_above = classifications_for_boxes_above[duplicate_box_mask]
    probabilities_above_threshold = probabilities_above_threshold[duplicate_box_mask]

    classes_boxes_and_probs = []
    for i in range(len(boxes_above_threshold)):
        classes_boxes_and_probs.append([network_classifications[classifications_for_boxes_above[i]],boxes_above_threshold[i][0],boxes_above_threshold[i][1],boxes_above_threshold[i][2],boxes_above_threshold[i][3],probabilities_above_threshold[i]])

    return classes_boxes_and_probs

# creates a mask to remove duplicate objects (boxes) and their related probabilities and classifications
# that should be considered the same object.  This is determined by how similar the boxes are
# based on the intersection-over-union metric.
# box_list is as list of boxes (4 floats for centerX, centerY and Length and Width)
def get_duplicate_box_mask(box_list):
    # The intersection-over-union threshold to use when determining duplicates.
    # objects/boxes found that are over this threshold will be
    # considered the same object
    max_iou = 0.25
    box_mask = np.ones(len(box_list))

    for i in range(len(box_list)):
        if box_mask[i] == 0: continue
        for j in range(i + 1, len(box_list)):
            if get_intersection_over_union(box_list[i], box_list[j]) > max_iou:
                box_mask[j] = 0.0

    filter_iou_mask = np.array(box_mask > 0.0, dtype='bool')
    return filter_iou_mask

# Converts the boxes in box list to pixel units
# assumes box_list is the output from the box output from
# the tiny yolo network and is [grid_size x grid_size x 2 x 4].
def boxes_to_pixel_units(box_list, image_width, image_height, grid_size):

    # number of boxes per grid cell
    boxes_per_cell = 2

    # setup some offset values to map boxes to pixels
    # box_offset will be [[ [0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6]] ...repeated for 7 ]
    box_offset = np.transpose(np.reshape(np.array([np.arange(grid_size)]*(grid_size*2)),(boxes_per_cell,grid_size, grid_size)),(1,2,0))

    # adjust the box center
    box_list[:,:,:,0] += box_offset
    box_list[:,:,:,1] += np.transpose(box_offset,(1,0,2))
    box_list[:,:,:,0:2] = box_list[:,:,:,0:2] / (grid_size * 1.0)

    # adjust the lengths and widths
    box_list[:,:,:,2] = np.multiply(box_list[:,:,:,2],box_list[:,:,:,2])
    box_list[:,:,:,3] = np.multiply(box_list[:,:,:,3],box_list[:,:,:,3])

    #scale the boxes to the image size in pixels
    box_list[:,:,:,0] *= image_width
    box_list[:,:,:,1] *= image_height
    box_list[:,:,:,2] *= image_width
    box_list[:,:,:,3] *= image_height


# Evaluate the intersection-over-union for two boxes
# The intersection-over-union metric determines how close
# two boxes are to being the same box.  The closer the boxes
# are to being the same, the closer the metric will be to 1.0
# box_1 and box_2 are arrays of 4 numbers which are the (x, y)
# points that define the center of the box and the length and width of
# the box.
# Returns the intersection-over-union (between 0.0 and 1.0)
# for the two boxes specified.
def get_intersection_over_union(box_1, box_2):

    # one diminsion of the intersecting box
    intersection_dim_1 = min(box_1[0]+0.5*box_1[2],box_2[0]+0.5*box_2[2])-\
                         max(box_1[0]-0.5*box_1[2],box_2[0]-0.5*box_2[2])

    # the other dimension of the intersecting box
    intersection_dim_2 = min(box_1[1]+0.5*box_1[3],box_2[1]+0.5*box_2[3])-\
                         max(box_1[1]-0.5*box_1[3],box_2[1]-0.5*box_2[3])

    if intersection_dim_1 < 0 or intersection_dim_2 < 0 :
        # no intersection area
        intersection_area = 0
    else :
        # intersection area is product of intersection dimensions
        intersection_area =  intersection_dim_1*intersection_dim_2

    # calculate the union area which is the area of each box added
    # and then we need to subtract out the intersection area since
    # it is counted twice (by definition it is in each box)
    union_area = box_1[2]*box_1[3] + box_2[2]*box_2[3] - intersection_area;

    # now we can return the intersection over union
    iou = intersection_area / union_area
    
    return iou

def display_objects_in_gui(source_image, filtered_objects,play):
    # copy image so we can draw on it. Could just draw directly on source image if not concerned about that.
    display_image = source_image.copy()
    source_image_width = source_image.shape[1]
    source_image_height = source_image.shape[0]

    x_ratio = float(source_image_width) / NETWORK_IMAGE_WIDTH
    y_ratio = float(source_image_height) / NETWORK_IMAGE_HEIGHT
    possible_objects = ["bicycle", "bus", "car", "motorbike", "person", "train", "miscellaneous"]
    num_objects = [0,0,0,0,0,0,0]
    #op through each box and draw it on the image along with a classification label
    print('Found this many objects in the image: ' + str(len(filtered_objects)))
    for obj_index in range(len(filtered_objects)):
        center_x = int(filtered_objects[obj_index][1] * x_ratio) 
        center_y = int(filtered_objects[obj_index][2] * y_ratio)
        half_width = int(filtered_objects[obj_index][3] * x_ratio)//2
        half_height = int(filtered_objects[obj_index][4] * y_ratio)//2

        # calculate box (left, top) and (right, bottom) coordinates
        box_left = max(center_x - half_width, 0)
        box_top = max(center_y - half_height, 0)
        box_right = min(center_x + half_width, source_image_width)
        box_bottom = min(center_y + half_height, source_image_height)

        print('box at index ' + str(obj_index) + ' is... left: ' + str(box_left) + ', top: ' + str(box_top) + ', right: ' + str(box_right) + ', bottom: ' + str(box_bottom))  

        #draw the rectangle on the image.  This is hopefully around the object
        box_color = (0, 255, 0)  # green box
        box_thickness = 2
        cv2.rectangle(display_image, (box_left, box_top),(box_right, box_bottom), box_color, box_thickness)

        # draw the classification label string just above and to the left of the rectangle
        label_background_color = (70, 120, 70) # greyish green background for text
        label_text_color = (255, 255, 255)   # white text

        
        #obj_index[0] = "unknown"
        cv2.rectangle(display_image,(box_left, box_top-20),(box_right,box_top), label_background_color, -1)
        cv2.putText(display_image,filtered_objects[obj_index][0] + ' : %.2f' % filtered_objects[obj_index][5], (box_left+5,box_top-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)
        num_objects[possible_objects.index(filtered_objects[obj_index][0])]+=1
    dl_text, dl_value = calcDangerLevel(filtered_objects)
    cv2.rectangle(display_image, (source_image_width - 250, 0), (source_image_width - 1, 40), dangerLevel(dl_text), -1)
    cv2.putText(display_image, 'Danger Level: ' + dl_text, (source_image_width - 230, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0 ,0), 1) 
    for i in range(len(possible_objects)):
        print("Found:", num_objects[i], "examples of a", possible_objects[i])
    window_name = 'TinyYolo'
    cv2.imshow(window_name, display_image)
    #print(filtered_objects[obj_index][0])
    raw_key = cv2.waitKey(1) & 0xFF
    #clicked = cv2.setMouseCallback(window_name, clickEvent)
    #if clicked == True:
       #play = not(play)
    
    #if play == False:
      #  display_objects_in_gui(source_image, filtered_objects,play)
    if raw_key == ord('s'):
        filename = 'yolo' + str(random.randint(0,20000)) + '.jpg'
        cv2.imwrite(os.path.join('imgs4-29_2/', filename), display_image)
    return raw_key, num_objects, dl_value

def dangerLevel(level):
    switch = {
        'Low': (0, 255, 0),
        'Medium-Low': (0, 255, 255),
        'Medium': (0, 165, 255),
        'Medium-High': (0, 95, 255),
        'High': (0, 0, 255)
    }
    return switch.get(level)

def calcDangerLevel(filtered_objects):
    value = 0
    num_obj = 0
    possible_objects = {"bicycle": False, "bus": False, "car": False, "motorbike": False, "person" : False, "train" : False}
    for obj_index in range(len(filtered_objects)):
        item = filtered_objects[obj_index][0]
        switcher = {
            'bicycle': .6,
            'bus': .7,
            'car': .5,
            'motorbike': .6,
            'person': .4,
            'train': .7,
            'miscellaneous': .1
        }
        value = value + switcher.get(item)
        possible_objects[item] = True
        num_obj = num_obj + 1
    if(num_obj == 0):
        value = 0
    else:
        scaling_factor = sum(list(possible_objects.values()))**1.4
        value = (scaling_factor * value)/1.4
    print("Danger Value: ", value)        
    value = min(4.999, value)
    text = ""
    value_list = ['Low', 'Medium-Low', 'Medium', 'Medium-High', 'High']
    value = int(math.floor(value))
    text = value_list[value]
    return text, value
# This function is called from the entry point to do
# all the work.
def main():
    # Set logging level and initialize/open the first NCS we find
    mvnc.SetGlobalOption(mvnc.GlobalOption.LOG_LEVEL, 0)
    devices = mvnc.EnumerateDevices()
    if len(devices) == 0:
        print('No devices found')
        return 1
    device = mvnc.Device(devices[0])
    device.OpenDevice()

    #Load graph from disk and allocate graph via API
    with open(tiny_yolo_graph_file, mode='rb') as f:
        graph_from_disk = f.read()

    graph = device.AllocateGraph(graph_from_disk)

    # Read image from file, resize it to network width and height
    # save a copy in display_image for display, then convert to float32, normalize (divide by 255),
    # and finally convert to convert to float16 to pass to LoadTensor as input for an inference
    camera = PiCamera()
    rawCapture = PiRGBArray(camera)
    time.sleep(0.1)

    play = True
    possible_objects = ["bicycle", "bus", "car", "motorbike", "person", "train", "miscellaneous"]
    count = 0
    total_objects = [0,0,0,0,0,0,0]
    dl_total = 0
    for frame in camera.capture_continuous(rawCapture, format = "bgr", use_video_port = True):
        input_image = rawCapture.array
        if play == True:
            display_image = input_image
            input_image = cv2.resize(input_image, (NETWORK_IMAGE_WIDTH, NETWORK_IMAGE_HEIGHT), cv2.INTER_LINEAR)
            input_image = input_image.astype(np.float32)
            input_image = np.divide(input_image, 255.0)
            input_image = input_image[:, :, ::-1]  # convert to RGB
            t0 = time.time()
            #load tensor and get result.  This executes the inference on the NCS
            graph.LoadTensor(input_image.astype(np.float16), 'user object')
            output, userobj = graph.GetResult()

            # filter out all the objects/boxes that don't meet thresholds
            filtered_objs = filter_objects(output.astype(np.float32), input_image.shape[1], input_image.shape[0]) # fc27 instead of fc12 for yolo_small
            
            print("Computation time:", time.time() - t0)
        raw_key, num_objects, dl_value= display_objects_in_gui(display_image, filtered_objs,True)
        rawCapture.truncate(0)
        if raw_key == ord("p"):
            play = not(play)
        elif raw_key == ord("q"):
            avg_objects = [n/count for n in total_objects]
            value_list = ['Low', 'Medium-Low', 'Medium', 'Medium-High', 'High']
            for i in range(len(avg_objects)):
                print("Found an average of:", avg_objects[i], "examples of a", possible_objects[i])
            av_dl = int(math.floor(dl_total/count))
            print("Average danger level: " + value_list[av_dl])
            break
        else:
            total_objects = [tot + new for tot, new in zip(total_objects, num_objects)]
            dl_total += dl_value
            count += 1
            
    #Clean up
    graph.DeallocateGraph()
    device.CloseDevice()
    print('Finished')


# main entry point for program. we'll call main() to do what needs to be done.
if __name__ == "__main__":
    sys.exit(main())
e