-
Notifications
You must be signed in to change notification settings - Fork 212
Commit
# Pull Request for PyVerse 💡 ## Requesting to submit a pull request to the PyVerse repository. ### Issue Title **Volume Control using Hand Gesture** - [x] I have provided the issue title. ### Info about the Related Issue **What's the goal of the project?** The aim of this project is to create an innovative, gesture-based volume control system using computer vision and hand tracking technologies. It allows users to adjust their computer's volume by simply moving their hand in front of a webcam, providing an intuitive and touchless interface for audio control. - [x] I have described the aim of the project. ### Name **Miten Gandhi** - [x] I have provided my name. ### GitHub ID **mjgandhi2305** - [x] I have provided my GitHub ID. ### Email ID **mjgandhi2305@gmail.com** - [x] I have provided my email ID. ### Identify Yourself **GSSOC Extd., Hacktoberfest 2024** - [x] I have mentioned my participant role. ### Closes **Closes: #510** - [x] I have provided the issue number. ### Describe the Add-ons or Changes You've Made I have implemented a gesture-based volume control system with the following features: 1. Real-time hand detection and tracking using the Mediapipe library 2. Custom HandTrackingModule for advanced gesture recognition 3. Volume control based on the distance between thumb and index finger 4. Integration with system audio controls using PyCaw 5. Visual feedback displaying current volume level and hand landmarks 6. Smooth volume transitions to prevent erratic changes - [x] I have described my changes. ### Type of Change - [x] New feature (non-breaking change which adds functionality) - [x] This change requires a documentation update ### How Has This Been Tested? The system has been tested through the following methods: 1. Manual testing with various hand positions and lighting conditions 2. Verification of accurate volume changes corresponding to hand gestures 3. Performance testing to ensure smooth real-time operation 4. Edge case testing (e.g., no hand in frame, multiple hands) 5. Testing on different Windows systems to ensure compatibility - [x] I have described my testing process. ### Checklist - [x] My code follows the guidelines of this project. - [x] I have performed a self-review of my own code. - [x] I have commented my code, particularly wherever it was hard to understand. - [x] I have made corresponding changes to the documentation. - [x] My changes generate no new warnings. - [x] I have added things that prove my fix is effective or that my feature works. - [x] Any dependent changes have been merged and published in downstream modules.
- Loading branch information
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import cv2 | ||
import time | ||
import numpy as np | ||
import HandTrackingModule as htm | ||
import math | ||
from ctypes import cast, POINTER | ||
from comtypes import CLSCTX_ALL | ||
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume | ||
|
||
################################ | ||
wCam, hCam = 640, 480 | ||
################################ | ||
|
||
cap = cv2.VideoCapture(0) | ||
cap.set(3, wCam) | ||
cap.set(4, hCam) | ||
pTime = 0 | ||
|
||
detector = htm.handDetector(detectionCon=0.7, maxHands=1) | ||
|
||
devices = AudioUtilities.GetSpeakers() | ||
interface = devices.Activate( | ||
IAudioEndpointVolume._iid_, CLSCTX_ALL, None) | ||
volume = cast(interface, POINTER(IAudioEndpointVolume)) | ||
# volume.GetMute() | ||
# volume.GetMasterVolumeLevel() | ||
volRange = volume.GetVolumeRange() | ||
minVol = volRange[0] | ||
maxVol = volRange[1] | ||
vol = 0 | ||
volBar = 400 | ||
volPer = 0 | ||
area = 0 | ||
colorVol = (255, 0, 0) | ||
|
||
while True: | ||
success, img = cap.read() | ||
|
||
# Find Hand | ||
img = detector.findHands(img) | ||
lmList, bbox = detector.findPosition(img, draw=True) | ||
if len(lmList) != 0: | ||
|
||
# Filter based on size | ||
area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) // 100 | ||
# print(area) | ||
if 250 < area < 1000: | ||
|
||
# Find Distance between index and Thumb | ||
length, img, lineInfo = detector.findDistance(4, 8, img) | ||
# print(length) | ||
|
||
# Convert Volume | ||
volBar = np.interp(length, [50, 200], [400, 150]) | ||
volPer = np.interp(length, [50, 200], [0, 100]) | ||
|
||
# Reduce Resolution to make it smoother | ||
smoothness = 5 | ||
volPer = smoothness * round(volPer / smoothness) | ||
|
||
# Check fingers up | ||
fingers = detector.fingersUp() | ||
# print(fingers) | ||
|
||
# If pinky is down set volume | ||
if not fingers[4]: | ||
volume.SetMasterVolumeLevelScalar(volPer / 100, None) | ||
cv2.circle(img, (lineInfo[4], lineInfo[5]), 15, (0, 255, 0), cv2.FILLED) | ||
colorVol = (0, 255, 0) | ||
else: | ||
colorVol = (255, 0, 0) | ||
|
||
# Drawings | ||
cv2.rectangle(img, (50, 150), (85, 400), (255, 0, 0), 3) | ||
cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED) | ||
cv2.putText(img, f'{int(volPer)} %', (40, 450), cv2.FONT_HERSHEY_COMPLEX, | ||
1, (255, 0, 0), 3) | ||
cVol = int(volume.GetMasterVolumeLevelScalar() * 100) | ||
cv2.putText(img, f'Vol Set: {int(cVol)}', (400, 50), cv2.FONT_HERSHEY_COMPLEX, | ||
1, colorVol, 3) | ||
|
||
# Frame rate | ||
cTime = time.time() | ||
fps = 1 / (cTime - pTime) | ||
pTime = cTime | ||
cv2.putText(img, f'FPS: {int(fps)}', (40, 50), cv2.FONT_HERSHEY_COMPLEX, | ||
1, (255, 0, 0), 3) | ||
|
||
cv2.imshow("Img", img) | ||
cv2.waitKey(1) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import cv2 | ||
import mediapipe as mp | ||
import time | ||
import math | ||
|
||
|
||
class handDetector: | ||
def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.5, trackCon=0.5): | ||
self.mode = mode | ||
self.maxHands = maxHands | ||
self.modelComplex = modelComplexity | ||
self.detectionCon = detectionCon | ||
self.trackCon = trackCon | ||
|
||
self.mpHands = mp.solutions.hands | ||
self.hands = self.mpHands.Hands(static_image_mode=self.mode, | ||
max_num_hands=self.maxHands, | ||
model_complexity=self.modelComplex, | ||
min_detection_confidence=self.detectionCon, | ||
min_tracking_confidence=self.trackCon) | ||
self.mpDraw = mp.solutions.drawing_utils | ||
self.tipIds = [4, 8, 12, 16, 20] | ||
self.results = None | ||
self.lmList = [] | ||
|
||
def findHands(self, img, draw=True): | ||
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | ||
self.results = self.hands.process(imgRGB) | ||
|
||
if self.results.multi_hand_landmarks: | ||
for handLms in self.results.multi_hand_landmarks: | ||
if draw: | ||
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS) | ||
return img | ||
|
||
def findPosition(self, img, handNo=0, draw=True): | ||
xList = [] | ||
yList = [] | ||
bbox = [] | ||
self.lmList = [] | ||
|
||
if self.results.multi_hand_landmarks: | ||
if len(self.results.multi_hand_landmarks) > handNo: | ||
myHand = self.results.multi_hand_landmarks[handNo] | ||
|
||
for id, lm in enumerate(myHand.landmark): | ||
h, w, c = img.shape | ||
cx, cy = int(lm.x * w), int(lm.y * h) | ||
xList.append(cx) | ||
yList.append(cy) | ||
self.lmList.append([id, cx, cy]) | ||
|
||
if draw: | ||
# Draw a circle for each landmark | ||
cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED) | ||
|
||
# If the landmark is the thumb tip (id 4), draw a red dot | ||
# if id == 4: | ||
# cv2.circle(img, (cx, cy), 15, (0, 0, 255), cv2.FILLED) # Red dot on thumb tip | ||
|
||
if xList and yList: | ||
xmin, xmax = min(xList), max(xList) | ||
ymin, ymax = min(yList), max(yList) | ||
bbox = xmin, ymin, xmax, ymax | ||
|
||
if draw: | ||
cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20), | ||
(bbox[2] + 20, bbox[3] + 20), (0, 255, 0), 2) | ||
|
||
return self.lmList, bbox | ||
|
||
def fingersUp(self): | ||
fingers = [] | ||
if len(self.lmList) >= 21: # Check if we have all landmarks | ||
# Thumb | ||
if self.lmList[self.tipIds[0]][1] > self.lmList[self.tipIds[0] - 1][1]: | ||
fingers.append(1) | ||
else: | ||
fingers.append(0) | ||
# 4 Fingers | ||
for id in range(1, 5): | ||
if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]: | ||
fingers.append(1) | ||
else: | ||
fingers.append(0) | ||
return fingers | ||
|
||
def findDistance(self, p1, p2, img, draw=True): | ||
if len(self.lmList) >= max(p1, p2): | ||
x1, y1 = self.lmList[p1][1], self.lmList[p1][2] | ||
x2, y2 = self.lmList[p2][1], self.lmList[p2][2] | ||
cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 | ||
|
||
if draw: | ||
cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED) | ||
cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED) | ||
cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3) | ||
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED) | ||
|
||
length = math.hypot(x2 - x1, y2 - y1) | ||
return length, img, [x1, y1, x2, y2, cx, cy] | ||
return None | ||
|
||
|
||
def main(): | ||
pTime = 0 | ||
cap = cv2.VideoCapture(0) # Make sure to change index to 0 for default camera | ||
detector = handDetector() | ||
|
||
while True: | ||
success, img = cap.read() | ||
if not success: | ||
print("Ignoring empty frame from the webcam.") | ||
break | ||
|
||
img = detector.findHands(img) | ||
lmList, bbox = detector.findPosition(img) | ||
|
||
if len(lmList) != 0: | ||
print(lmList[4]) # Example of printing specific landmark positions | ||
|
||
# Calculate FPS | ||
cTime = time.time() | ||
fps = 1 / (cTime - pTime) | ||
pTime = cTime | ||
|
||
# Display FPS on image | ||
cv2.putText(img, f'FPS: {int(fps)}', (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3) | ||
|
||
# Show the image | ||
cv2.imshow("Image", img) | ||
|
||
if cv2.waitKey(1) & 0xFF == ord('q'): | ||
break | ||
|
||
cap.release() | ||
cv2.destroyAllWindows() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |