Skip to content

Volume Control Using Hand Gesture #530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 90 additions & 0 deletions Computer Vision/Volume Control Using Hand Gesture/GestureVolume.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import cv2
import time
import numpy as np
import HandTrackingModule as htm
import math
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

################################
wCam, hCam = 640, 480
################################

cap = cv2.VideoCapture(0)
cap.set(3, wCam)
cap.set(4, hCam)
pTime = 0

detector = htm.handDetector(detectionCon=0.7, maxHands=1)

devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(
IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
# volume.GetMute()
# volume.GetMasterVolumeLevel()
volRange = volume.GetVolumeRange()
minVol = volRange[0]
maxVol = volRange[1]
vol = 0
volBar = 400
volPer = 0
area = 0
colorVol = (255, 0, 0)

while True:
success, img = cap.read()

# Find Hand
img = detector.findHands(img)
lmList, bbox = detector.findPosition(img, draw=True)
if len(lmList) != 0:

# Filter based on size
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) // 100
# print(area)
if 250 < area < 1000:

# Find Distance between index and Thumb
length, img, lineInfo = detector.findDistance(4, 8, img)
# print(length)

# Convert Volume
volBar = np.interp(length, [50, 200], [400, 150])
volPer = np.interp(length, [50, 200], [0, 100])

# Reduce Resolution to make it smoother
smoothness = 5
volPer = smoothness * round(volPer / smoothness)

# Check fingers up
fingers = detector.fingersUp()
# print(fingers)

# If pinky is down set volume
if not fingers[4]:
volume.SetMasterVolumeLevelScalar(volPer / 100, None)
cv2.circle(img, (lineInfo[4], lineInfo[5]), 15, (0, 255, 0), cv2.FILLED)
colorVol = (0, 255, 0)
else:
colorVol = (255, 0, 0)

# Drawings
cv2.rectangle(img, (50, 150), (85, 400), (255, 0, 0), 3)
cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED)
cv2.putText(img, f'{int(volPer)} %', (40, 450), cv2.FONT_HERSHEY_COMPLEX,
1, (255, 0, 0), 3)
cVol = int(volume.GetMasterVolumeLevelScalar() * 100)
cv2.putText(img, f'Vol Set: {int(cVol)}', (400, 50), cv2.FONT_HERSHEY_COMPLEX,
1, colorVol, 3)

# Frame rate
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, f'FPS: {int(fps)}', (40, 50), cv2.FONT_HERSHEY_COMPLEX,
1, (255, 0, 0), 3)

cv2.imshow("Img", img)
cv2.waitKey(1)
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import cv2
import mediapipe as mp
import time
import math


class handDetector:
def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.modelComplex = modelComplexity
self.detectionCon = detectionCon
self.trackCon = trackCon

self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(static_image_mode=self.mode,
max_num_hands=self.maxHands,
model_complexity=self.modelComplex,
min_detection_confidence=self.detectionCon,
min_tracking_confidence=self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
self.tipIds = [4, 8, 12, 16, 20]
self.results = None
self.lmList = []

def findHands(self, img, draw=True):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)

if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
return img

def findPosition(self, img, handNo=0, draw=True):
xList = []
yList = []
bbox = []
self.lmList = []

if self.results.multi_hand_landmarks:
if len(self.results.multi_hand_landmarks) > handNo:
myHand = self.results.multi_hand_landmarks[handNo]

for id, lm in enumerate(myHand.landmark):
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
xList.append(cx)
yList.append(cy)
self.lmList.append([id, cx, cy])

if draw:
# Draw a circle for each landmark
cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)

# If the landmark is the thumb tip (id 4), draw a red dot
# if id == 4:
# cv2.circle(img, (cx, cy), 15, (0, 0, 255), cv2.FILLED) # Red dot on thumb tip

if xList and yList:
xmin, xmax = min(xList), max(xList)
ymin, ymax = min(yList), max(yList)
bbox = xmin, ymin, xmax, ymax

if draw:
cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
(bbox[2] + 20, bbox[3] + 20), (0, 255, 0), 2)

return self.lmList, bbox

def fingersUp(self):
fingers = []
if len(self.lmList) >= 21: # Check if we have all landmarks
# Thumb
if self.lmList[self.tipIds[0]][1] > self.lmList[self.tipIds[0] - 1][1]:
fingers.append(1)
else:
fingers.append(0)
# 4 Fingers
for id in range(1, 5):
if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
fingers.append(1)
else:
fingers.append(0)
return fingers

def findDistance(self, p1, p2, img, draw=True):
if len(self.lmList) >= max(p1, p2):
x1, y1 = self.lmList[p1][1], self.lmList[p1][2]
x2, y2 = self.lmList[p2][1], self.lmList[p2][2]
cx, cy = (x1 + x2) // 2, (y1 + y2) // 2

if draw:
cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)

length = math.hypot(x2 - x1, y2 - y1)
return length, img, [x1, y1, x2, y2, cx, cy]
return None


def main():
pTime = 0
cap = cv2.VideoCapture(0) # Make sure to change index to 0 for default camera
detector = handDetector()

while True:
success, img = cap.read()
if not success:
print("Ignoring empty frame from the webcam.")
break

img = detector.findHands(img)
lmList, bbox = detector.findPosition(img)

if len(lmList) != 0:
print(lmList[4]) # Example of printing specific landmark positions

# Calculate FPS
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime

# Display FPS on image
cv2.putText(img, f'FPS: {int(fps)}', (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)

# Show the image
cv2.imshow("Image", img)

if cv2.waitKey(1) & 0xFF == ord('q'):
break

cap.release()
cv2.destroyAllWindows()


if __name__ == "__main__":
main()
Loading