raspberryface.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Asif Khan
# June 07, 2024

# SOURCE: 
# https://hub.fgit.cf/PINTO0309/PINTO_model_zoo
# https://github.com/PINTO0309/PINTO_model_zoo/issues/138
# Compare the feature vectors generated by
# w600k_r50_float32.onnx and w600k_r50_float32.tflite

"""
DEMO OF COMBINED 
SCRFD.TFLITE FACE DETECTOR
w600k_r50.TFLITE FACE RECOGNIZER
"""

import os
import cv2
import copy
import time
import argparse
import libcamera# Required for Raspberry Pi Camera
from picamera2 import Picamera2# Required for Raspberry Pi Camera
import numpy as np
import pandas as pd
import tkinter as tk
#import tensorflow as tf
import tflite_runtime.interpreter as tflite

from scrfd.scrfd_tflite import SCRFD# scrfd face detector (tlite version)
from onnx_insightface import norm_crop as alignment# alignment of face

vectorizer = 'BFL'# represents buffalo from deepinsight/insightface/model_zoo
dir_models = './models'# where can I find pre-trained models?
write_faces_to_disk = True

if write_faces_to_disk:
    # Create a new directory if it does not exist
    if not os.path.exists("faces_written"): 
        os.makedirs("faces_written")


picam2 = Picamera2()
# configure to get RGB image
# BEWARE: {'format': 'BGR888'} = RGB in OpenCV
config = picam2.create_preview_configuration({'format': 'BGR888'}, transform=libcamera.Transform(hflip=1))
picam2.configure(config)
#picam2.start_preview(Preview.QTGL)
#picam2.set_controls({'ExposureTime': 100000})
picam2.start()

'''
# print controls of the camera
metadata = picam2.capture_metadata()
controls = {c: metadata[c] for c in ["ExposureTime", "AnalogueGain", "ColourGains"]}
print(\ncontrols of the camera:\n)
picam2.set_controls(controls)
'''

# LOAD ENROLLED IDENTITIES
print("\n\nLoading database of enrolled identities...")
df_id_names = pd.read_pickle("id_names.pkl")
np_id_fvectors = np.load("scrfd_500m_bnkps_480x640_model_float16_quant_w600k_r50_float32_features.npy")
#print(df_id_names.head())# print names of identities
#print(np_id_fvectors.shape)# print feature vectors of identities
    
def enroll_identity(fv):
    '''
    GUI to enroll new identity
    '''
    # create root window
    root = tk.Tk()
    # root window title and dimension
    root.title("Welcome to the club.")
    # set geometry(widthxheight)
    root.geometry('350x150')
    
    # adding a label to the root window
    lbl_fname = tk.Label(root, text = "First Name:\t")
    lbl_fname.grid()
 
    # adding Entry Field
    txt_fname = tk.Entry(root, width=20)
    txt_fname.grid(column =1, row =0)
    
    # adding a label to the root window
    lbl_mname = tk.Label(root, text = "Middle Name:\t")
    lbl_mname.grid()
 
    # adding Entry Field
    txt_mname = tk.Entry(root, width=20)
    txt_mname.grid(column =1, row =1) 
    
    # adding a label to the root window
    lbl_lname = tk.Label(root, text = "Last Name:\t")
    lbl_lname.grid()
    
    # adding Entry Field
    txt_lname = tk.Entry(root, width=20)
    txt_lname.grid(column =1, row =2) 
    
    # adding a label to the root window
    lbl_number = tk.Label(root, text = "Number:\t")
    lbl_number.grid()
 
    # adding Entry Field
    txt_number = tk.Entry(root, width=20)
    txt_number.grid(column =1, row =3) 
    
    # adding a label to the root window
    lbl_message = tk.Label(root, text = "")
    lbl_message.grid()
    
    # function to display user text when button is clicked
    def clicked():
        global df_id_names
        global np_id_fvectors
        print(df_id_names)
        
        fname = txt_fname.get()
        mname = txt_mname.get()
        lname = txt_lname.get()
        number = txt_number.get()
        
        if len(fname) < 2 or len(lname) < 2:
            message = "invalid Name."
            lbl_message.configure(text = message)
        else:
            #res = "You wrote" + txt_fname.get()
            #lbl_fname.configure(text = res)
            
            # write identity name and feature vector to .pkl file
            fv_list = []
            fv_list.append(fv)
            
            if len(number) < 5:
                names = [fname, mname, lname, -1]
            else:
                names = [fname, mname, lname, number]
            
            print("names: ", names)
            
            df_id_names.loc[len(df_id_names.index)] = names
            df_id_names.to_pickle("id_names.pkl")
            fvs = np.vstack([np_id_fvectors, fv])
            np.save("scrfd_500m_bnkps_480x640_model_float16_quant_w600k_r50_float32_features", fvs)
            
            print(fname + " " + " " + mname + " " + lname + " " + number)
            
            # LOAD UPDATED ENROLLED IDENTITIES
            print("\n\nLoading updated database of enrolled identities...")
            df_id_names = pd.read_pickle("./id_names.pkl")
            np_id_fvectors = np.load("scrfd_500m_bnkps_480x640_model_float16_quant_w600k_r50_float32_features.npy")

            message = "Done"
            lbl_message.configure(text = message)
            
            root.destroy()
    
    # button widget with black color text inside
    btn = tk.Button(root, text = "Enroll", fg = "black", command=clicked)
    # Set Button Grid
    btn.grid(column=1, row=4)
    # Execute Tkinter
    root.mainloop()

def generate_writing_crop(imag, bounding_box, first_name):
    '''
    generate a larger crop of face and write it to disk
    '''
    #x1, y1, x2, y2 = bboxes[indx].astype(np.int64)# int to int64 by Asif
    x1, y1, x2, y2, _ = bounding_box
    widt = x2 - x1# width of bounding box
    higt = y2 - y1# height of bounding box
    top_left_x = max(0, int(x1 - (widt/2)))
    bot_rigt_x = min(top_left_x + (2*widt), frameW-1)
    top_left_y = max(0, int(y1 - (higt/2)))
    bot_rigt_y = min(top_left_y+(2*higt), frameH-1)
    #crop_save = frame[top_left_y:top_left_y+(2*higt), top_left_x:top_left_x + (2*widt), :]
    writing_crop = imag[top_left_y:bot_rigt_y, top_left_x:bot_rigt_x, :]
    
    #cv2.imshow('writing_crop', writing_crop)
    named_tuple = time.localtime() # get struct_time
    time_string = time.strftime("%Y%m%d_%H%M%S", named_tuple)
    cv2.imwrite(f"./faces_written/{first_name}_{time_string}.png",writing_crop)
        
'''
# CREATE A PICKLE FILE TO ENROLL IDENTITIES
# method 1
fv = []# feature vector
fv.append(np.random.random_sample((112)).astype(np.float32))
data = [['first', 'last', fv]]
df = pd.DataFrame(data, columns=['FirstName', 'LastName', 'FeatureVector'])
#df.to_csv("test_file.csv", index=False)
df.to_pickle("scrfd_500m_bnkps_480x640_model_float16_quant_w600k_r50_float32_features.pkl")
'''
'''
# method 2
id_data = [['first', 'last'], ['firs', 'las']]
df = pd.DataFrame(id_data, columns=['FirstName', 'LastName'])
#df.to_csv("test_file.csv", index=False)
df.to_pickle("id_names.pkl")

f1 = np.random.random_sample((512)).astype(np.float32)
f2 = np.random.random_sample((512)).astype(np.float32)
fvs = np.vstack([f1, f2])
np.save("fvs", fvs)

'''

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--device", type=int, default=0)
    parser.add_argument("--movie", type=str, default=None)
    parser.add_argument("--detmodel", type=str, default='tflite/scrfd_500m_bnkps_480x640_float16_quant.tflite')
    #parser.add_argument("--detmodel", type=str, default='onnx/scrfd_500m_bnkps_480x640.onnx')
    #parser.add_argument("--recmodel", type=str, default='onnx/w600k_r50.onnx')
    parser.add_argument("--recmodel", type=str, default='onnx/w600k_r50_float32.tflite')
    parser.add_argument("--input_size", type=str, default='480,640')
    parser.add_argument("--score_th", type=float, default=0.5)
    parser.add_argument("--nms_th", type=float, default=0.4)
    args = parser.parse_args()
    return args

args = get_args()

# determine resolution of input image
input_size = [int(i) for i in args.input_size.split(',')]

# LOAD FACE DETECTION MODEL (scrfd)
print('\n\nLoading face detection model...')
detector = SCRFD(model_file=dir_models+'/'+args.detmodel, nms_thresh=args.nms_th, threads=3)
detector.prepare(-1)

# LOAD FACE RECOGNITION MODEL
print('\n\nLoading face recognition model...')
if vectorizer == 'BFL':
    model_format = args.recmodel.split('.')[-1]
    if model_format == 'onnx':
        #from vectorizer.onnx_insightface import norm_crop as alignment
        #from onnx_insightface import norm_crop as alignment
        from onnx_insightface import VectorizerModel
        vectorizer_model = VectorizerModel(dir_models+'/'+args.recmodel)
    elif model_format == 'tflite':
        interpreter = tflite.Interpreter(model_path="./models/tflite/w600k_r50_float32.tflite", num_threads=3)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
    else:
        print('Face Recognition Model: Invalid Format.')

'''
# update and write to disk
df_id_names = df_id_names.head(2)
df_id_names.to_pickle("./id_names.pkl")
np_id_fvectors = np_id_fvectors[0:2]
np.save("./scrfd_500m_bnkps_480x640_model_float16_quant_w600k_r50_float32_features.npy", np_id_fvectors)
'''
#import sys
#sys.exit(1)

while True:

    # Capture read
    '''
    ret, frame = cap.read()
    if not ret:
        break
    debug_image = copy.deepcopy(frame)
    '''
    
    frame_RGB = picam2.capture_array()# RGB
    frame_BGR = cv2.cvtColor(frame_RGB, cv2.COLOR_RGB2BGR)
    frame_BGR_orig = copy.deepcopy(frame_BGR)
    frameH, frameW, frameC = frame_RGB.shape
    # cv2.imshow("img_BGR", image_BGR)# make sure to process RGB
    #cv2.imshow("img_RGB", frame_RGB)# make sure to process RGB

    # Infer Face Detection
    start_time = time.time()
    bboxes, keypoints = detector.detect(
        frame_RGB,
        args.score_th,
        input_size=(input_size[1], input_size[0]),
    )
    elapsed_time_detection = time.time() - start_time
    
    '''
    # Draw bbox and keypoints on detected faces
    for indx, bbox in enumerate(bboxes):
        
        #x1, y1, x2, y2, _ = bbox.astype(np.int)
        x1, y1, x2, y2, _ = bbox.astype(np.int64)# int to int64 by Asif
        cv2.rectangle(debug_image, (x1, y1), (x2, y2), (255, 0, 0), 2)

        if keypoints is not None:
            for keypoint in keypoints[indx]:
                #keypoint = keypoint.astype(np.int)
                keypoint = keypoint.astype(np.int64)# int to int64 by Asif
                cv2.circle(debug_image, tuple(keypoint), 5, (0, 0, 255), 2)
    '''
    
    # Proceed only if a face is detected
    if (len(bboxes) > 0):# if a face is detected
        
        # which face to process if multiple faces detected?
        indx = 0# index of highest score face
        
        # Generate recognition-crop (112x112) as per insightface
        landmarks = keypoints[indx]# take only first face
        recog_crop, _ = alignment(frame_RGB, landmarks)
        
        # Generate writing-crop
        #generate_writing_crop(frame, bboxes[indx].astype(np.int64))
        '''
        x1, y1, x2, y2, _ = bboxes[indx].astype(np.int64)# int to int64 by Asif
        widt = x2 - x1# width of bounding box
        higt = y2 - y1# height of bounding box
        top_left_x = max(0, int(x1 - (widt/2)))
        bot_rigt_x = min(top_left_x + (2*widt), frameW-1)
        top_left_y = max(0, int(y1 - (higt/2)))
        bot_rigt_y = min(top_left_y+(2*higt), frameH-1)
        #crop_save = frame[top_left_y:top_left_y+(2*higt), top_left_x:top_left_x + (2*widt), :]
        crop_save = frame[top_left_y:bot_rigt_y, top_left_x:bot_rigt_x, :]
        cv2.imshow('insightface_crop', crop_save)
        '''
        
        # Generate feature vector as per insightface (w600k_r50.onnx)
        start_time = time.time()
        
        # ONNX
        #feature_vector_onnx = vectorizer_model.forward([recog_crop,])[0]
          
        # TFLITE    
        #cv2.imshow('RGB_crop?', recog_crop)
        #cv2.waitKey(0) 
        #cv2.destroyAllWindows()
    
        recog_crop = (recog_crop-127.5)/127.5
        
        recog_crop = np.array(recog_crop, dtype=np.float32)
        input_data = input_data = np.expand_dims(recog_crop, axis=0)
        interpreter.set_tensor(input_details[0]['index'], input_data)
        
        interpreter.invoke()
        
        output_data = interpreter.get_tensor(output_details[0]['index'])
        output_data = output_data/np.linalg.norm(output_data)
        feature_vector_tflite = output_data[0]
        
        elapsed_time_vectorization = time.time() - start_time
        
        # Display feature vector extraction elapsed time
        cv2.putText(
            frame_BGR,
            "Elapsed Time Vec: " + '{:.1f}'.format(elapsed_time_vectorization * 1000) + "ms",
            (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1, cv2.LINE_AA)
    
        # calculate the difference array (euclidian distances)
        #difference_array = np.absolute(np_id_fvectors-feature_vector_onnx)
        #difference_array = np.absolute(np.linalg.norm(feature_vector_onnx - np_id_fvectors, axis=1))
        difference_array = np.linalg.norm(feature_vector_tflite - np_id_fvectors, axis=1)
        
        # find the index of minimum element from the array
        index = difference_array.argmin()
        dist = difference_array[index]
        #print("difference_array: ", difference_array)
        #print("Nearest element to the given values is : ", np_id_fvectors[index])
        #print("Index of nearest value is : ", index)
        
        # Display the name of recognized person
        #print(df_id_names.iloc[index]["FirstName"])
        if dist < 1.0000:
            id_fname = df_id_names.iloc[index]["FirstName"]
            cv2.putText(
                frame_BGR, "{}".format(id_fname),
                (keypoints[indx][0].astype(np.int64)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 1, cv2.LINE_AA)
            # Generate writing-crop
            if write_faces_to_disk:
                generate_writing_crop(frame_BGR_orig, bboxes[indx].astype(np.int64), id_fname)
        else:
            cv2.putText(
                frame_BGR, "{}".format("?"),
                (keypoints[indx][0].astype(np.int64)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 140, 255), 1, cv2.LINE_AA)
            # Generate writing-crop
            if write_faces_to_disk:
                generate_writing_crop(frame_BGR_orig, bboxes[indx].astype(np.int64), "?")
        
        #diff = np.linalg.norm(feature_vector_onnx - feature_vector_tflite)
        #print("diff: ", diff)
    
    # Display detection inference elapsed time
    cv2.putText(
        frame_BGR,
        "Elapsed Time Det: " + '{:.1f}'.format(elapsed_time_detection * 1000) + "ms",
        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1, cv2.LINE_AA)
    

    key = cv2.waitKey(1)
    if key == 101:# Enrole Identity
        enroll_identity(feature_vector_tflite)
    elif key == 27:  # ESC
        break
    cv2.imshow('AK Face Recognition', frame_BGR)
    
#cap.release()
cv2.destroyAllWindows()