forked from ntu-rris/google-mediapipe
-
Notifications
You must be signed in to change notification settings - Fork 0
/
08_skeleton_3D.py
114 lines (97 loc) · 3.76 KB
/
08_skeleton_3D.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
###############################################################################
### Simple demo on displaying 3D hand/body skeleton
### Input : Live video of hand/body
### Output: 3D display of hand/body skeleton
### Usage : python 08_skeleton_3D.py -m hand
### : python 08_skeleton_3D.py -m body
### : python 08_skeleton_3D.py -m holistic
###############################################################################
import cv2
import time
import argparse
# import numpy as np
from utils_display import DisplayHand, DisplayBody, DisplayHolistic
from utils_mediapipe import MediaPipeHand, MediaPipeBody, MediaPipeHolistic
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--mode', default='hand', help=' Select mode: hand / body / holistic')
args = parser.parse_args()
mode = args.mode
# Start video capture
cap = cv2.VideoCapture(0) # By default webcam is index 0
# cap = cv2.VideoCapture('../data/video.mp4') # Read from .mp4 file
# Read in sample image to estimate camera intrinsic
ret, img = cap.read(0)
# img = cv2.resize(img, None, fx=0.5, fy=0.5)
img_width = img.shape[1]
img_height = img.shape[0]
intrin = {
'fx': img_width*0.9, # Approx 0.7w < f < w https://www.learnopencv.com/approximate-focal-length-for-webcams-and-cell-phone-cameras/
'fy': img_width*0.9,
'cx': img_width*0.5, # Approx center of image
'cy': img_height*0.5,
'width': img_width,
'height': img_height,
}
# Load mediapipe and display class
if mode=='hand':
pipe = MediaPipeHand(static_image_mode=False, max_num_hands=2, intrin=intrin)
disp = DisplayHand(draw3d=True, draw_camera=True, max_num_hands=2, intrin=intrin)
elif mode=='body':
# Note: As of version 0.8.3 3D joint estimation is only available in full body mode
pipe = MediaPipeBody(static_image_mode=False, model_complexity=1, intrin=intrin)
disp = DisplayBody(draw3d=True, draw_camera=True, intrin=intrin)
elif mode=='holistic':
# Note: As of version 0.8.3 3D joint estimation is only available in full body mode
pipe = MediaPipeHolistic(static_image_mode=False, model_complexity=1, intrin=intrin)
disp = DisplayHolistic(draw3d=True, draw_camera=True, intrin=intrin)
# log = False
# count = 0
# cap.set(cv2.CAP_PROP_POS_FRAMES, 900)
prev_time = time.time()
while cap.isOpened():
ret, img = cap.read()
if not ret:
cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # Loop back
ret, img = cap.read()
# break
# Flip image for 3rd person view
img = cv2.flip(img, 1)
# img = cv2.resize(img, None, fx=0.5, fy=0.5)
# To improve performance, optionally mark image as not writeable to pass by reference
img.flags.writeable = False
# Feedforward to extract keypoint
param = pipe.forward(img)
# Compute FPS
curr_time = time.time()
fps = 1/(curr_time-prev_time)
if mode=='body':
param['fps'] = fps
elif mode=='face' or mode=='hand':
param[0]['fps'] = fps
elif mode=='holistic':
for p in param:
p['fps'] = fps
prev_time = curr_time
img.flags.writeable = True
# Display keypoint
cv2.imshow('img 2D', disp.draw2d(img, param))
# Display 3D
disp.draw3d(param, img)
disp.vis.update_geometry(None)
disp.vis.poll_events()
disp.vis.update_renderer()
# if log:
# img = (np.asarray(disp.vis.capture_screen_float_buffer())*255).astype(np.uint8)
# img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# cv2.imwrite('../data/image/'+str(count).zfill(2)+'.png', img)
# count += 1
key = cv2.waitKey(1)
if key==27:
break
if key==ord('r'): # Press 'r' to reset camera view
disp.camera.reset_view()
# if key==32: # Press spacebar to start logging images
# log = not log
# print('Log', log)
pipe.pipe.close()
cap.release()