Skip to content

Commit 6e911d9

Browse files
committed
Project 1 Gesture Volume Control
1 parent 4e5f720 commit 6e911d9

File tree

5 files changed

+280
-0
lines changed

5 files changed

+280
-0
lines changed
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""
2+
Hand Tracing Module
3+
By: Murtaza Hassan
4+
Youtube: http://www.youtube.com/c/MurtazasWorkshopRoboticsandAI
5+
Website: https://www.computervision.zone
6+
7+
Modified by: Diraw
8+
Date: 20240811
9+
Description:
10+
1. Modified the initialization of the `Hands` object to use named parameters for better clarity and compatibility with the latest version of the mediapipe library. This change ensures that the parameters are correctly mapped to the expected arguments in the `Hands` class.
11+
2. Added a line to flip the image horizontally using `cv2.flip(img, 1)` to ensure the hand movements appear mirrored, which is more intuitive for user interaction.
12+
3. Added the code lmList = lmList[0] at line 59 in VolumeHandControl.py to fix the error: IndexError: tuple index out of range.
13+
"""
14+
15+
import cv2
16+
import mediapipe as mp
17+
import time
18+
import math
19+
20+
21+
# Hand detection class
22+
class handDetector:
23+
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
24+
# Initialize parameters
25+
self.mode = mode # Static mode flag
26+
self.maxHands = maxHands # Maximum number of hands to detect
27+
self.detectionCon = detectionCon # Detection confidence threshold
28+
self.trackCon = trackCon # Tracking confidence threshold
29+
30+
# Initialize hand detection module
31+
self.mpHands = mp.solutions.hands
32+
self.hands = self.mpHands.Hands(
33+
static_image_mode=self.mode,
34+
max_num_hands=self.maxHands,
35+
min_detection_confidence=self.detectionCon,
36+
min_tracking_confidence=self.trackCon,
37+
)
38+
39+
self.mpDraw = mp.solutions.drawing_utils # Drawing utilities
40+
self.tipIds = [4, 8, 12, 16, 20] # Fingertip IDs
41+
42+
# Find hands and draw
43+
def findHands(self, img, draw=True):
44+
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert to RGB
45+
self.results = self.hands.process(imgRGB) # Process the image
46+
47+
if self.results.multi_hand_landmarks: # If hands are detected
48+
for handLms in self.results.multi_hand_landmarks:
49+
if draw: # If drawing is enabled
50+
self.mpDraw.draw_landmarks(
51+
img, handLms, self.mpHands.HAND_CONNECTIONS
52+
)
53+
return img
54+
55+
# Find hand position
56+
def findPosition(self, img, handNo=0, draw=True):
57+
xList = [] # Store x coordinates
58+
yList = [] # Store y coordinates
59+
bbox = [] # Bounding box
60+
self.lmList = [] # Store hand landmarks
61+
62+
if self.results.multi_hand_landmarks:
63+
myHand = self.results.multi_hand_landmarks[
64+
handNo
65+
] # Select the handNo-th hand
66+
for id, lm in enumerate(myHand.landmark):
67+
h, w, c = img.shape # Get image dimensions
68+
cx, cy = int(lm.x * w), int(lm.y * h) # Convert to pixel coordinates
69+
xList.append(cx)
70+
yList.append(cy)
71+
self.lmList.append([id, cx, cy]) # Add to list
72+
if draw: # If drawing is enabled
73+
cv2.circle(img, (cx, cy), 5, (255, 0, 255), cv2.FILLED)
74+
xmin, xmax = min(xList), max(xList) # Calculate bounding box
75+
ymin, ymax = min(yList), max(yList)
76+
bbox = xmin, ymin, xmax, ymax
77+
78+
if draw: # Draw bounding box
79+
cv2.rectangle(
80+
img,
81+
(bbox[0] - 20, bbox[1] - 20),
82+
(bbox[2] + 20, bbox[3] + 20),
83+
(0, 255, 0),
84+
2,
85+
)
86+
87+
return self.lmList, bbox
88+
89+
# Detect if fingers are up
90+
def fingersUp(self):
91+
# In OpenCV, the top-left corner of the image is the origin (0,0), x-coordinate increases to the right, y-coordinate increases downwards
92+
# If img = cv2.flip(img, 1) is used, the image is flipped horizontally. As a result, the x-coordinate relationship is reversed: the right side of the image becomes the left side of the coordinate system. Therefore, the x-coordinate becomes larger as it moves to the left.
93+
# Further rational analysis:
94+
# The statement "In OpenCV, the top-left corner of the image is the origin (0,0), x-coordinate increases to the right, y-coordinate increases downwards" is always true. This means that the image we see on the computer screen always follows this coordinate system.
95+
# Before flipping, our movements are opposite to what's shown on the computer screen. From our perspective, moving to the right decreases the x-coordinate in the computer image. After flipping the image, from our perspective, moving to the left decreases the x-coordinate in the computer image. Now our perspective aligns with the computer's coordinate system.
96+
# Based on this, the condition for determining if the right thumb is extended is that the x-coordinate of point 4 is less than the x-coordinate of point 3.
97+
98+
fingers = []
99+
# Thumb
100+
if self.lmList[self.tipIds[0]][1] < self.lmList[self.tipIds[0] - 1][1]:
101+
# self.tipIds = [4, 8, 12, 16, 20] # Fingertip IDs
102+
# self.tipIds[0] is the index of the thumb tip (4), self.tipIds[0] - 1 is the index of the joint before the thumb tip (3)
103+
# [1] gets the x-coordinate of that joint from self.lmList, because self.lmList.append([id, cx, cy]), where the 0th dimension is id, and the 1st dimension is the x-coordinate
104+
# This condition now applies to the case when the right thumb is extended
105+
fingers.append(1) # Right thumb is extended, return 1
106+
else:
107+
fingers.append(0) # Right thumb is bent, return 0
108+
# Other four fingers
109+
for id in range(1, 5): # Loop through 4 IDs: 1, 2, 3, 4
110+
if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
111+
# This is for judging other fingers. In OpenCV, y-coordinate increases downwards
112+
fingers.append(1) # Finger is extended
113+
else:
114+
fingers.append(0) # Finger is bent
115+
return fingers
116+
117+
# Calculate distance between two points
118+
def findDistance(self, p1, p2, img, draw=True):
119+
x1, y1 = self.lmList[p1][1], self.lmList[p1][2] # First point
120+
x2, y2 = self.lmList[p2][1], self.lmList[p2][2] # Second point
121+
cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 # Midpoint
122+
123+
if draw: # If drawing is enabled
124+
cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
125+
cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
126+
cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
127+
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
128+
129+
length = math.hypot(x2 - x1, y2 - y1) # Calculate distance
130+
return length, img, [x1, y1, x2, y2, cx, cy]
131+
132+
133+
# Main function
134+
def main():
135+
pTime = 0 # Previous frame time
136+
cap = cv2.VideoCapture(0) # Open camera
137+
detector = handDetector() # Initialize detector
138+
139+
while True:
140+
success, img = cap.read() # Read image
141+
img = cv2.flip(img, 1) # Flip image horizontally
142+
img = detector.findHands(img) # Detect hands
143+
lmList, bbox = detector.findPosition(img) # Get position
144+
if len(lmList) != 0:
145+
print(lmList[4]) # Print specific landmark
146+
147+
cTime = time.time() # Current time
148+
fps = 1 / (cTime - pTime) # Calculate FPS
149+
pTime = cTime
150+
151+
cv2.putText(
152+
img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3
153+
) # Display FPS
154+
155+
cv2.imshow("Image", img) # Show image
156+
cv2.waitKey(1) # Wait for key press
157+
158+
159+
if __name__ == "__main__":
160+
main() # Run main function
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import cv2
2+
import time
3+
import numpy as np
4+
import HandTrackingModule as htm
5+
import math
6+
from ctypes import cast, POINTER
7+
from comtypes import CLSCTX_ALL
8+
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
9+
10+
# from pprint import pprint
11+
12+
# Set camera width and height
13+
wCam, hCam = 640, 480
14+
15+
# Initialize camera
16+
cap = cv2.VideoCapture(0)
17+
# if cap.isOpened():
18+
# print("Camera successfully opened")
19+
# else:
20+
# print("Failed to open camera")
21+
cap.set(3, wCam) # Set width
22+
cap.set(4, hCam) # Set height
23+
# Set the resolution of the camera capture to 640x480, i.e., 640x480 pixels
24+
25+
pTime = 0 # Time of the previous frame
26+
27+
# Initialize hand detector with detection confidence
28+
detector = htm.handDetector(detectionCon=0.7)
29+
30+
# Get audio devices
31+
devices = AudioUtilities.GetSpeakers()
32+
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
33+
volume = cast(interface, POINTER(IAudioEndpointVolume))
34+
35+
# Get volume range
36+
volRange = volume.GetVolumeRange()
37+
minVol = volRange[0] # Minimum volume
38+
maxVol = volRange[1] # Maximum volume
39+
# print(volRange) # (-45.0, 0.0, 1.0)
40+
41+
vol = 0 # Current volume
42+
volBar = 400 # Volume bar position
43+
volPer = 0 # Volume percentage
44+
45+
while True:
46+
success, img = cap.read() # Read camera image
47+
# if success:
48+
# print("Image captured successfully")
49+
# # cv2.imshow("Captured Image", img)
50+
# # cv2.waitKey(0)
51+
# else:
52+
# print("Failed to capture image")
53+
img = cv2.flip(img, 1) # Flip image horizontally
54+
img = detector.findHands(img) # Detect hands
55+
lmList = detector.findPosition(img, draw=False) # Get list of hand landmarks
56+
# print(type(lmList))
57+
# pprint(lmList)
58+
# lmList_is_empty = all(len(lst) == 0 for lst in lmList)
59+
lmList = lmList[0]
60+
# pprint(lmList)
61+
62+
if len(lmList) != 0:
63+
# Get coordinates of index finger and thumb
64+
try:
65+
if len(lmList) > 4 and len(lmList[4]) > 1:
66+
x1, y1 = lmList[4][1], lmList[4][2]
67+
else:
68+
raise IndexError("Landmark index out of range")
69+
except IndexError as e:
70+
print(e)
71+
x1, y1 = lmList[4][1], lmList[4][2] # Thumb
72+
x2, y2 = lmList[8][1], lmList[8][2] # Index finger
73+
cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 # Center point
74+
# print(1)
75+
76+
# Draw circles and line
77+
cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED) # Thumb circle
78+
cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED) # Index finger circle
79+
cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3) # Connecting line
80+
cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED) # Center point circle
81+
82+
# Calculate distance between two points
83+
length = math.hypot(x2 - x1, y2 - y1)
84+
85+
# Convert distance to volume
86+
vol = np.interp(length, [50, 300], [minVol, maxVol])
87+
volBar = np.interp(length, [50, 300], [400, 150])
88+
volPer = np.interp(length, [50, 300], [0, 100])
89+
90+
print(int(length), vol)
91+
92+
# Set volume
93+
volume.SetMasterVolumeLevel(vol, None)
94+
95+
# If distance is less than 50, change circle color
96+
if length < 50:
97+
cv2.circle(img, (cx, cy), 15, (0, 255, 0), cv2.FILLED)
98+
99+
# Draw volume bar
100+
cv2.rectangle(img, (50, 150), (85, 400), (255, 0, 0), 3) # Outer frame
101+
cv2.rectangle(img, (50, int(volBar)), (85, 400), (255, 0, 0), cv2.FILLED) # Fill
102+
cv2.putText(
103+
img, f"{int(volPer)} %", (40, 450), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 3
104+
)
105+
106+
# Calculate frame rate
107+
cTime = time.time()
108+
fps = 1 / (cTime - pTime)
109+
pTime = cTime
110+
111+
# Display frame rate
112+
cv2.putText(
113+
img, f"FPS: {int(fps)}", (40, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 3
114+
)
115+
116+
# Show image
117+
cv2.imshow("Img", img)
118+
cv2.waitKey(1)
Binary file not shown.
Binary file not shown.

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ This is the English version of the repository. You can find the Chinese version
2929
- Python: `3.8`
3030
- OpenCV: `4.10.0`
3131
- MediaPipe: `0.10.10`
32+
- Pycaw: `20240210`
3233

3334
## Conda Environment Setup
3435

@@ -37,4 +38,5 @@ conda create -n visionpy python=3.8
3738
conda activate visionpy
3839
pip install opencv-python==4.10.0
3940
pip install mediapipe==0.10.10
41+
pip install pycaw==20240210
4042
```

0 commit comments

Comments
 (0)