|
2 | 2 | import mediapipe as mp |
3 | 3 | import time |
4 | 4 |
|
5 | | -# 打开摄像头 |
6 | | -cap = cv2.VideoCapture(0) # 0是默认摄像头 |
7 | | - |
8 | | -# 初始化手部检测模块 |
9 | | -mpHands = mp.solutions.hands # 引用 MediaPipe 的手部解决方案模块 |
10 | | -hands = mpHands.Hands() # 创建一个 Hands 对象,用于检测和跟踪手部关键点 |
11 | | -mpDraw = mp.solutions.drawing_utils # 引用绘图工具,用于在图像上绘制检测到的手部关键点和连接线 |
12 | | - |
13 | | -# 初始化时间变量用于计算帧率 |
14 | | -pTime = 0 # 表示前一帧的时间,previous time |
15 | | -cTime = 0 # 表示当前帧的时间,current time |
16 | | -# cTime - pTime 计算时间差,从而计算帧率。最后将 cTime 赋值给 pTime,以便在下一次循环时使用 |
| 5 | +# Open the camera |
| 6 | +cap = cv2.VideoCapture(0) # 0 is the default camera |
| 7 | + |
| 8 | +# Initialize the hand detection module |
| 9 | +mpHands = mp.solutions.hands # Reference MediaPipe's hand solution module |
| 10 | +hands = ( |
| 11 | + mpHands.Hands() |
| 12 | +) # Create a Hands object for detecting and tracking hand landmarks |
| 13 | +mpDraw = ( |
| 14 | + mp.solutions.drawing_utils |
| 15 | +) # Reference drawing tools for drawing detected hand landmarks and connections |
| 16 | + |
| 17 | +# Initialize time variables for calculating FPS |
| 18 | +pTime = 0 # Previous time for the previous frame |
| 19 | +cTime = 0 # Current time for the current frame |
| 20 | +# cTime - pTime calculates the time difference to compute FPS. Finally, cTime is assigned to pTime for use in the next loop |
17 | 21 |
|
18 | 22 | while True: |
19 | | - # 读取摄像头图像 |
| 23 | + # Read the camera image |
20 | 24 | success, img = cap.read() |
21 | | - # success:一个布尔值,表示是否成功读取帧 |
22 | | - # img:读取的图像帧,如果读取失败,这个值可能为空 |
| 25 | + # success: Boolean indicating if the frame was read successfully |
| 26 | + # img: The image frame read; this value may be empty if reading fails |
23 | 27 |
|
24 | | - # 水平翻转图像 |
| 28 | + # Flip the image horizontally |
25 | 29 | img = cv2.flip(img, 1) |
26 | | - # 第 0 维表示垂直方向(高度),对应图像的行数,上下 |
27 | | - # 第 1 维表示水平方向(宽度),对应图像的列数,左右 |
| 30 | + # Dimension 0 represents the vertical direction (height), corresponding to the number of rows in the image, top to bottom |
| 31 | + # Dimension 1 represents the horizontal direction (width), corresponding to the number of columns in the image, left to right |
28 | 32 |
|
29 | | - # 将图像从 BGR 格式转换为 RGB 格式 |
| 33 | + # Convert the image from BGR format to RGB format |
30 | 34 | imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
31 | | - # BGR 是图像在 OpenCV 中的默认颜色格式,代表蓝色(Blue)、绿色(Green)、红色(Red)。这种格式与通常使用的 RGB(红、绿、蓝)顺序相反。转换为 RGB 是因为许多图像处理库(如 MediaPipe)使用这种格式进行处理 |
| 35 | + # BGR is the default color format in OpenCV, representing Blue, Green, and Red. This order is opposite to the commonly used RGB (Red, Green, Blue). Conversion to RGB is because many image processing libraries (like MediaPipe) use this format for processing |
32 | 36 |
|
33 | | - # 处理图像以检测手部 |
| 37 | + # Process the image to detect hands |
34 | 38 | results = hands.process(imgRGB) |
35 | 39 |
|
36 | | - # 如果检测到手部 |
| 40 | + # If hands are detected |
37 | 41 | if results.multi_hand_landmarks: |
38 | | - # 遍历检测到的每只手 |
| 42 | + # Iterate over each detected hand |
39 | 43 | for handLms in results.multi_hand_landmarks: |
40 | | - # results.multi_hand_landmarks 会返回一个列表,其中包含检测到的每只手的关键点信息。如果检测到多只手,它会包含多个元素,每个元素代表一只手的所有关键点 |
| 44 | + # results.multi_hand_landmarks returns a list containing landmark information for each detected hand. If multiple hands are detected, it contains multiple elements, each representing all landmarks of one hand |
41 | 45 |
|
42 | | - # 遍历手部关键点 |
43 | | - for id, lm in enumerate(handLms.landmark): |
44 | | - # enumerate 返回一个迭代器,每次迭代返回一个包含索引和值的元组 |
45 | | - # id 是手部关键点的索引,lm 是 landmark 的缩写,表示手部关键点的坐标信息 |
| 46 | + # Iterate over hand landmarks |
| 47 | + for id, lm in enumerate(handLms.landmark): |
| 48 | + # enumerate returns an iterator, each iteration returns a tuple containing the index and value |
| 49 | + # id is the index of the hand landmark, lm is short for landmark, representing the coordinate information of the hand landmark |
46 | 50 |
|
47 | | - # 获取图像的尺寸 |
48 | | - h, w, c = (img.shape) |
49 | | - # img.shape 返回一个包含图像维度的元组,具体包括:高度(行数)、宽度(列数)、通道数(如 RGB 图像的通道数为 3) |
| 51 | + # Get the dimensions of the image |
| 52 | + h, w, c = img.shape |
| 53 | + # img.shape returns a tuple containing the image dimensions: height (number of rows), width (number of columns), and number of channels (e.g., 3 for RGB images) |
50 | 54 |
|
51 | | - # 计算关键点在图像中的坐标 |
| 55 | + # Calculate the coordinates of the landmark in the image |
52 | 56 | cx, cy = int(lm.x * w), int(lm.y * h) |
53 | | - # lm.x 和 lm.y 是关键点的归一化坐标,范围在 0 到 1 之间。通过乘以图像的宽度和高度,可以将它们转换为图像中的像素坐标 |
| 57 | + # lm.x and lm.y are the normalized coordinates of the landmark, ranging from 0 to 1. By multiplying by the image width and height, they can be converted to pixel coordinates in the image |
54 | 58 |
|
55 | 59 | print(id, cx, cy) |
56 | 60 |
|
57 | | - # 在关键点处画一个圆圈 |
| 61 | + # Draw a circle at the landmark |
58 | 62 | cv2.circle(img, (cx, cy), 15, (255, 0, 255), -1) |
59 | | - # img 表示要绘制图像的地方,(cx, cy) 圆心的坐标,15 圆的半径 |
60 | | - # (255, 0, 255) 圆的颜色(BGR格式),这里是紫色 |
61 | | - # 红色:(0, 0, 255)、绿色:(0, 255, 0)、蓝色:(255, 0, 0)、黄色:(0, 255, 255)、青色:(255, 255, 0)、品红:(255, 0, 255)、白色:(255, 255, 255)、黑色:(0, 0, 0) |
62 | | - # cv2.FILLED 或 -1 填充圆的实心样式,也可以为具体的数字(值为边框厚度) |
| 63 | + # img is where to draw the image, (cx, cy) is the center of the circle, 15 is the radius |
| 64 | + # (255, 0, 255) is the color of the circle (BGR format), which is purple here |
| 65 | + # Red: (0, 0, 255), Green: (0, 255, 0), Blue: (255, 0, 0), Yellow: (0, 255, 255), Cyan: (255, 255, 0), Magenta: (255, 0, 255), White: (255, 255, 255), Black: (0, 0, 0) |
| 66 | + # cv2.FILLED or -1 for a filled circle, or a specific number for border thickness |
63 | 67 |
|
64 | | - # 绘制手部关键点和连接线 |
| 68 | + # Draw hand landmarks and connections |
65 | 69 | mpDraw.draw_landmarks(img, handLms, mpHands.HAND_CONNECTIONS) |
66 | | - # img 要绘制的图像,handLms 手部关键点的坐标 |
67 | | - # mpHands.HAND_CONNECTIONS 定义手部关键点之间的连接关系,用于绘制骨架结构 |
| 70 | + # img is the image to draw on, handLms are the coordinates of the hand landmarks |
| 71 | + # mpHands.HAND_CONNECTIONS defines the connections between hand landmarks for drawing the skeleton structure |
68 | 72 |
|
69 | | - # 计算帧率 |
| 73 | + # Calculate FPS |
70 | 74 | cTime = time.time() |
71 | 75 | fps = 1 / (cTime - pTime) |
72 | 76 | pTime = cTime |
73 | 77 |
|
74 | | - # 在图像上显示帧率 |
75 | | - cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3) |
76 | | - # img 要绘制文本的地方,str(int(fps)) 要显示的文本内容,这里是帧率的整数部分 |
77 | | - # (10, 70) 文本的左下角坐标,cv2.FONT_HERSHEY_PLAIN 字体样式 |
78 | | - # 3 字体大小,(255, 0, 255) 文本颜色(紫色,BGR格式),3 文本的粗细 |
79 | | - # cv2.putText 不支持关键字传参,必须按照顺序提供参数 |
80 | | - |
81 | | - # 显示图像 |
82 | | - cv2.imshow("Image", img) # 在窗口中显示图像,窗口标题为“Image” |
83 | | - cv2.waitKey(1) # 等待键盘事件,参数为 1 表示等待 1 毫秒 |
84 | | - # 它也允许图像窗口响应用户输入(如关闭窗口) |
85 | | - |
86 | | - # 检测退出键 |
87 | | - if cv2.waitKey(1) & 0xFF == ord("q"): # ord('q') 获取字符 'q' 的 ASCII 值 |
88 | | - # cv2.waitKey(1) & 0xFF 用来读取键盘输入 |
89 | | - # cv2.waitKey(1) 返回的是一个 32 位整数,其中低 8 位是实际的键值,& 0xFF 是一个位运算,用于提取这 8 位 |
90 | | - # "低 8 位"指的是一个数值的二进制表示中最右边的 8 位。这些位表示数值的较小部分,与"高 8 位"(最左边的 8 位)相对,后者表示数值的较大部分。对于 32 位整数来说,低 8 位用于表示键盘输入的实际键值 |
| 78 | + # Display FPS on the image |
| 79 | + cv2.putText( |
| 80 | + img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3 |
| 81 | + ) |
| 82 | + # img is where to draw the text, str(int(fps)) is the text content to display, which is the integer part of FPS |
| 83 | + # (10, 70) is the bottom-left corner of the text, cv2.FONT_HERSHEY_PLAIN is the font style |
| 84 | + # 3 is the font size, (255, 0, 255) is the text color (purple, BGR format), 3 is the thickness of the text |
| 85 | + # cv2.putText does not support keyword arguments; parameters must be provided in order |
| 86 | + |
| 87 | + # Show the image |
| 88 | + cv2.imshow("Image", img) # Display the image in a window titled "Image" |
| 89 | + cv2.waitKey( |
| 90 | + 1 |
| 91 | + ) # Wait for a keyboard event; a parameter of 1 means to wait for 1 millisecond |
| 92 | + # This also allows the image window to respond to user input (such as closing the window) |
| 93 | + |
| 94 | + # Detect exit key |
| 95 | + if cv2.waitKey(1) & 0xFF == ord( |
| 96 | + "q" |
| 97 | + ): # ord('q') gets the ASCII value of the character 'q' |
| 98 | + # cv2.waitKey(1) & 0xFF is used to read keyboard input |
| 99 | + # cv2.waitKey(1) returns a 32-bit integer, where the lower 8 bits are the actual key value, and & 0xFF is a bitwise operation to extract these 8 bits |
| 100 | + # "Lower 8 bits" refers to the rightmost 8 bits in the binary representation of a number. These bits represent the smaller portion of the value, as opposed to the "higher 8 bits" (leftmost 8 bits), which represent the larger portion. For a 32-bit integer, the lower 8 bits are used to represent the actual key value from keyboard input |
91 | 101 | break |
92 | 102 |
|
93 | | -cap.release() # 释放摄像头资源 |
94 | | -cv2.destroyAllWindows() # 关闭所有 OpenCV 窗口 |
| 103 | +cap.release() # Release the camera resource |
| 104 | +cv2.destroyAllWindows() # Close all OpenCV windows |
0 commit comments