camera.py

# USAGE
# python predict_video.py --model model/activity.model --label-bin model/lb.pickle --input example_clips/lifting.mp4 --output output/lifting_128avg.avi --size 128

# import the necessary packages
from keras.models import load_model
from collections import deque
from PIL import Image
import imutils
import numpy as np
import argparse
import pickle
import cv2

# global variables
bg = None

#--------------------------------------------------
# To find the running average over the background
#--------------------------------------------------
def run_avg(image, aWeight):
    global bg
    # initialize the background
    if bg is None:
        bg = image.copy().astype("float")
        return

    # compute weighted average, accumulate it and update the background
    cv2.accumulateWeighted(image, bg, aWeight)

#---------------------------------------------
# To segment the region of hand in the image
#---------------------------------------------
def segment(image, threshold=25):
    global bg
    # find the absolute difference between background and current frame
    diff = cv2.absdiff(bg.astype("uint8"), image)

    # threshold the diff image so that we get the foreground
    thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)[1]
    backtorgb = cv2.merge([thresholded,thresholded,thresholded])

    # get the contours in the thresholded image
    (_, cnts, _) = cv2.findContours(thresholded.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # return None, if no contours detected
    if len(cnts) == 0:
        return
    else:
        # based on contour area, get the maximum contour which is the hand
        segmented = max(cnts, key=cv2.contourArea)
        return (backtorgb, segmented)

if __name__ == "__main__":
    # initialize weight for running average
    aWeight = 0.5
    # ap = argparse.ArgumentParser()
    # ap.add_argument("-m", "--model", required=True,
    # 	help="path to trained serialized model")
    # ap.add_argument("-l", "--label-bin", required=True,
    # 	help="path to  label binarizer")
    # ap.add_argument("-i", "--input", required=True,
    # 	help="path to our input video")
    # ap.add_argument("-o", "--output", required=True,
    # 	help="path to our output video")
    # ap.add_argument("-s", "--size", type=int, default=128,
    # 	help="size of queue for averaging")
    # args = vars(ap.parse_args())

    print("[INFO] loading model and label binarizer...")
    model = load_model("model/activity.model")
    lb = pickle.loads(open("model/lb.pickle", "rb").read())

    mean = np.array([123.68, 116.779, 103.939][::1], dtype="float32")
    Q = deque(maxlen=128)
    # get the reference to the webcam
    camera = cv2.VideoCapture(0)


    # region of interest (ROI) coordinates
    top, right, bottom, left = 10, 350, 225, 590
  

    # initialize num of frames
    num_frames = 0

    # keep looping, until interrupted
    while(camera.isOpened()):
        # get the current frame
        (grabbed, frame) = camera.read()

        # resize the frame
        frame = imutils.resize(frame, width=700)

        # flip the frame so that it is not the mirror view
        frame = cv2.flip(frame, 1)

        # clone the frame
        clone = frame.copy()

        # get the height and width of the frame
        (height, width) = frame.shape[:2]

        # get the ROI
        roi = frame[top:bottom, right:left]

        # convert the roi to grayscale and blur it
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (7, 7), 0)


        # to get the background, keep looking till a threshold is reached
        # so that our running average model gets calibrated
        if num_frames < 30:
            run_avg(gray, aWeight)
        else:
            # segment the hand region
            hand = segment(gray)

            # check whether hand region is segmented
            if hand is not None:
                # if yes, unpack the thresholded image and
                # segmented region
                
                (thresholded, segmented) = hand

                # draw the segmented region and display the frame
                cv2.drawContours(clone, [segmented + (right, top)], -1, (0, 0, 255))
                cv2.imshow("Thesholded", thresholded)
                

                print('f',frame.shape[0],frame.shape[1],frame.shape[2])
                #print('t',thresholded.shape[0],thresholded.shape[1],thresholded.shape[2])
                frame=thresholded.copy()
                #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame = cv2.resize(frame, (224, 224)).astype("float32")
                frame -= mean
                #print('f1',frame.shape[0],frame.shape[1],frame.shape[2])
                #cv2.imshow('d',frame)
                preds = model.predict(np.expand_dims(frame, axis=0))[0]
                Q.append(preds)
                results = np.array(Q).mean(axis=0)
                i = np.argmax(results)

                label = lb.classes_[i]
                text = "{}: {:.2f}%".format(label, preds[0] * 100)
                #print(text)
                cv2.putText(clone, text, (35, 50), cv2.FONT_HERSHEY_SIMPLEX,
		1.25, (0, 255, 0), 5)
	

        # draw the segmented hand
        cv2.rectangle(clone, (left, top), (right, bottom), (0,255,0), 2)

        # increment the number of frames
        num_frames += 1

       
        cv2.imshow("Video Feed", clone)
        # observe the keypress by the user
        keypress = cv2.waitKey(1) & 0xFF

        # if the user pressed "q", then stop looping
        if keypress == ord("q"):
            break

# free up memory
#out.release()
camera.release()
cv2.destroyAllWindows()