main.py

import os

from numpy.matrixlib import defmatrix
import tensorflow as tf
import cv2 
import numpy as np
import six
import pyautogui as pygui
import collections
import six
import PIL.Image as Image
from object_detection.utils import config_util
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
import argparse


# Argument Parser
parser = argparse.ArgumentParser(description='Configure runtime...')
parser.add_argument('-v', '--visualize', type=str, default='y',
                    help='(y/n) Visualize the creation of boxes. Default = y')
parser.add_argument('-t', '--threshold', type=float, default=0.34, 
                    help='Classification confidence threshold (0 to 1) Default=0.34')
                    
parser.add_argument('-a', '--annotations', type=str, default='annotations', 
                    help='Specify different path for label map annotations')
parser.add_argument('-m', '--model', type=str, default='ssd_mobnet_320', 
                    help='Specify different path for pretrained model')
parser.add_argument('-c', '--checkpoint', type=str, default='models/ssd_mobnet_320', 
                    help='Specify different path for trained checkpoint files')
parser.add_argument('-p', '--pipeline', type=str, default='models/ssd_mobnet_320/pipeline.config', 
                    help='Specify different path for pipeline.config file')

args = parser.parse_args()
conf_thresh = args.threshold
vis_bool = args.visualize


# Paths
ANNOTATION_PATH = args.annotations
CUSTOM_MODEL_NAME = args.model 
CHECKPOINT_PATH = args.checkpoint
CONFIG_PATH = args.pipeline

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]


# Rewriting tensorflow function to implement custom functionality
def override_visualize_boxes_and_labels_on_image_array(
    image,
    boxes,
    classes,
    scores,
    vis_bool,
    category_index,
    use_normalized_coordinates=False,
    max_boxes_to_draw=20,
    min_score_thresh=.5,
    agnostic_mode=False,
    line_thickness=4):
    
    
    box_to_display_str_map = collections.defaultdict(list)
    box_to_color_map = collections.defaultdict(str)
  
    if not max_boxes_to_draw:
        max_boxes_to_draw = boxes.shape[0]
    box = ()
    class_name = ''
    
    for i in range(boxes.shape[0]):
        if max_boxes_to_draw == len(box_to_color_map):
            break
        if scores is None or scores[i] > min_score_thresh:
            box = tuple(boxes[i].tolist())
            ymin, xmin, ymax, xmax = box
                        
            display_str = ''
            if not agnostic_mode:
                if classes[i] in six.viewkeys(category_index):
                    class_name = category_index[classes[i]]['name']
                else:
                    class_name = 'N/A'
            display_str = '{}: {}%'.format(str(class_name), round(100*scores[i]))
            box_to_display_str_map[box].append(display_str)
            
            if agnostic_mode:
                box_to_color_map[box] = 'DarkOrange'
            else:
                box_to_color_map[box] = STANDARD_COLORS[classes[i] % len(STANDARD_COLORS)]

    if vis_bool == 'y':
        for box, color in box_to_color_map.items():
            ymin, xmin, ymax, xmax = box
            
            viz_utils.draw_bounding_box_on_image_array(
            image,
            ymin,
            xmin,
            ymax,
            xmax,
            color=color,
            thickness=line_thickness,
            display_str_list=box_to_display_str_map[box],
            use_normalized_coordinates=use_normalized_coordinates)
        return box, class_name
    
    else:
        return box, class_name


# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-21')).expect_partial()

@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

# Real Time Detection
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')

# Setup capture
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

while True: 
    ret, frame = cap.read()
    image_np = np.array(frame)
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections = detect_fn(input_tensor)
    
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
    detections['num_detections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    label_id_offset = 1
    image_np_with_detections = image_np.copy()
    det_classes_with_offset = detections['detection_classes'] + label_id_offset
    im_width, im_height = list(pygui.size())
    box, class_name = override_visualize_boxes_and_labels_on_image_array(
                image_np_with_detections,
                detections['detection_boxes'],
                det_classes_with_offset,
                detections['detection_scores'],
                vis_bool,
                category_index,
                use_normalized_coordinates=True,
                max_boxes_to_draw=1,
                min_score_thresh=conf_thresh,
                agnostic_mode=False,
                )

    if len(box) != 0:
        ymin, xmin, ymax, xmax = box
        print(ymin, xmin, ymax, xmax)
        left, right, top, bottom = (xmin * im_width, xmax * im_width,
                                  ymin * im_height, ymax * im_height)

        if(class_name == 'single'):
            pygui.click(button='left')
        if(class_name == 'double'):
            pygui.click(button='right')
        if(class_name == 'pinch'):
            pygui.moveTo(round(right), round(top))
    if vis_bool=='y':
        cv2.imshow('object detection',  cv2.resize(image_np_with_detections, (800, 600)))
    
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cap.release()
            break