Skip to content

Commit 8117d4e

Browse files
committed
Chapter 3 Face Detection
1 parent 290cef2 commit 8117d4e

File tree

14 files changed

+242
-1
lines changed

14 files changed

+242
-1
lines changed

Chapter 2 Pose Estimation/tempCodeRunnerFile.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

Chapter 3 Face Detection/Basics.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import cv2
2+
import mediapipe as mp
3+
import time
4+
5+
# 打开视频文件
6+
cap = cv2.VideoCapture(
7+
"E:\\Advance Computer Vision with Python\\Chapter 3 Face Detection\\Videos\\4.mp4"
8+
)
9+
10+
pTime = 0 # 上一帧的时间
11+
12+
# 初始化MediaPipe的人脸检测模块
13+
mpFaceDetection = mp.solutions.face_detection
14+
mpDraw = mp.solutions.drawing_utils
15+
faceDetection = mpFaceDetection.FaceDetection(0.75) # 创建一个 MediaPipe 的人脸检测器对象并设置检测置信度阈值为0.75
16+
17+
while True:
18+
success, img = cap.read() # 读取视频帧
19+
if not success:
20+
print("Failed to read frame")
21+
break
22+
23+
# 将图像转换为RGB格式
24+
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
25+
results = faceDetection.process(imgRGB) # 处理图像,进行人脸检测
26+
27+
if results.detections:
28+
for id, detection in enumerate(results.detections):
29+
# mpDraw.draw_detection(img, detection) #使用 MediaPipe 提供的工具在图像上绘制检测到的人脸的边界框和关键点
30+
# print(id, detection)
31+
# print(detection.score)
32+
# print(detection.location_data.relative_bounding_box)
33+
34+
# 获取人脸检测的边界框信息
35+
bboxC = detection.location_data.relative_bounding_box
36+
ih, iw, ic = img.shape
37+
bbox = (
38+
int(bboxC.xmin * iw),
39+
int(bboxC.ymin * ih),
40+
int(bboxC.width * iw),
41+
int(bboxC.height * ih),
42+
)
43+
44+
# 绘制边界框
45+
cv2.rectangle(img, bbox, (255, 0, 255), 2)
46+
47+
# 显示检测置信度
48+
cv2.putText(
49+
img,
50+
f"{int(detection.score[0] * 100)}%",
51+
(bbox[0], bbox[1] - 20),
52+
cv2.FONT_HERSHEY_PLAIN,
53+
5,
54+
(255, 0, 255),
55+
5,
56+
)
57+
# 第一个5为字体大小,第二个5为字体粗细
58+
59+
# 计算并显示帧率FPS
60+
cTime = time.time()
61+
fps = 1 / (cTime - pTime)
62+
pTime = cTime
63+
cv2.putText(
64+
img, f"FPS: {int(fps)}", (20, 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 255, 0), 5
65+
)
66+
67+
cv2.namedWindow("Image", cv2.WINDOW_NORMAL) # 创建可调整大小的窗口
68+
69+
# 显示图像
70+
cv2.imshow("Image", img)
71+
if cv2.waitKey(1) & 0xFF == ord("q"):
72+
break
73+
74+
cap.release()
75+
cv2.destroyAllWindows()
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import cv2
2+
import mediapipe as mp
3+
import time
4+
5+
6+
# 定义一个人脸检测类
7+
class FaceDetector:
8+
def __init__(self, minDetectionCon=0.5):
9+
# 初始化检测置信度
10+
self.minDetectionCon = minDetectionCon
11+
12+
# 初始化MediaPipe的人脸检测模块
13+
self.mpFaceDetection = mp.solutions.face_detection
14+
self.mpDraw = mp.solutions.drawing_utils
15+
self.faceDetection = self.mpFaceDetection.FaceDetection(self.minDetectionCon)
16+
17+
def findFaces(self, img, draw=True):
18+
# 将图像转换为RGB格式
19+
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
20+
self.results = self.faceDetection.process(imgRGB) # 进行人脸检测
21+
bboxs = [] # 存储检测到的边界框
22+
23+
if self.results.detections:
24+
for id, detection in enumerate(self.results.detections):
25+
# 获取人脸检测的边界框信息
26+
bboxC = detection.location_data.relative_bounding_box
27+
ih, iw, ic = img.shape
28+
bbox = (
29+
int(bboxC.xmin * iw),
30+
int(bboxC.ymin * ih),
31+
int(bboxC.width * iw),
32+
int(bboxC.height * ih),
33+
)
34+
bboxs.append([id, bbox, detection.score]) # 添加到边界框列表
35+
36+
if draw:
37+
img = self.fancyDraw(img, bbox) # 绘制边界框
38+
# 显示检测置信度
39+
cv2.putText(
40+
img,
41+
f"{int(detection.score[0] * 100)}%",
42+
(bbox[0], bbox[1] - 20),
43+
cv2.FONT_HERSHEY_PLAIN,
44+
5,
45+
(255, 0, 255),
46+
5,
47+
)
48+
return img, bboxs
49+
50+
def fancyDraw(self, img, bbox, l=30, t=5, rt=1):
51+
# 这个 fancyDraw 函数通过在矩形框的四个角上绘制短线来实现自定义样式,与普通的矩形框相比,增加了视觉上的变化
52+
# 1、矩形框:使用 cv2.rectangle 绘制标准矩形框
53+
# 2、角线:在矩形的四个角上绘制短线段,使边框看起来更有设计感
54+
# l 角线的长度,t 角线的粗细,rt 矩形框的粗细(length 长度,thickness 厚度)
55+
56+
# 自定义绘制边框的样式
57+
x, y, w, h = bbox
58+
x1, y1 = x + w, y + h
59+
60+
cv2.rectangle(img, bbox, (255, 0, 255), rt) # 绘制矩形框
61+
# 绘制四个角的线条
62+
# 左上角
63+
cv2.line(img, (x, y), (x + l, y), (255, 0, 255), t)
64+
cv2.line(img, (x, y), (x, y + l), (255, 0, 255), t)
65+
# 右上角
66+
cv2.line(img, (x1, y), (x1 - l, y), (255, 0, 255), t)
67+
cv2.line(img, (x1, y), (x1, y + l), (255, 0, 255), t)
68+
# 左下角
69+
cv2.line(img, (x, y1), (x + l, y1), (255, 0, 255), t)
70+
cv2.line(img, (x, y1), (x, y1 - l), (255, 0, 255), t)
71+
# 右下角
72+
cv2.line(img, (x1, y1), (x1 - l, y1), (255, 0, 255), t)
73+
cv2.line(img, (x1, y1), (x1, y1 - l), (255, 0, 255), t)
74+
75+
return img
76+
77+
78+
def main():
79+
# 打开视频文件
80+
cap = cv2.VideoCapture(
81+
"E:\\Advance Computer Vision with Python\\Chapter 3 Face Detection\\Videos\\4.mp4"
82+
)
83+
pTime = 0 # 上一帧时间
84+
detector = FaceDetector() # 创建人脸检测器对象
85+
86+
while True:
87+
success, img = cap.read() # 读取视频帧
88+
img, bboxs = detector.findFaces(img) # 检测人脸并获取边界框
89+
print(bboxs) # 打印边界框信息
90+
91+
# 计算并显示帧率FPS
92+
cTime = time.time()
93+
fps = 1 / (cTime - pTime)
94+
pTime = cTime
95+
cv2.putText(
96+
img, f"FPS: {int(fps)}", (20, 70), cv2.FONT_HERSHEY_PLAIN, 5, (0, 255, 0), 5
97+
)
98+
99+
cv2.namedWindow("Image", cv2.WINDOW_NORMAL) # 创建可调整大小的窗口
100+
101+
# 显示图像
102+
cv2.imshow("Image", img)
103+
cv2.waitKey(1)
104+
105+
106+
if __name__ == "__main__":
107+
main()

Chapter 3 Face Detection/README.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# mp.solutions.face_detection
2+
3+
`mpFaceDetection = mp.solutions.face_detection` 初始化MediaPipe的人脸检测模块
4+
5+
`faceDetection = mpFaceDetection.FaceDetection(0.75)` 创建一个 MediaPipe 的人脸检测器对象,并设置检测置信度阈值为0.75
6+
7+
`results = faceDetection.process(imgRGB)` 处理图像,进行人脸检测
8+
9+
`mpFaceDetection.FaceDetection()` 的参数主要有:
10+
11+
`min_detection_confidence` 用于设置检测置信度阈值,默认值通常是 0.5
12+
13+
# 边框信息
14+
15+
获取人脸检测的边界框信息:
16+
17+
```python
18+
bboxC = detection.location_data.relative_bounding_box
19+
ih, iw, ic = img.shape
20+
bbox = (
21+
int(bboxC.xmin * iw),
22+
int(bboxC.ymin * ih),
23+
int(bboxC.width * iw),
24+
int(bboxC.height * ih),
25+
)
26+
```
27+
28+
原版:
29+
30+
![原版](./pics/原版.png)
31+
32+
自定义:
33+
34+
![自定义](./pics/自定义.png)
35+
36+
矩形框看起来不完整且不断变化,可能是因为:
37+
38+
- 视频帧更新:每帧都会重新绘制,可能导致视觉上有闪烁或变化
39+
40+
- 检测结果不稳定:人脸检测结果在不同帧之间不稳定,边框可能会跳动
41+
42+
# 源代码的一个小问题
43+
44+
如果你去网站上看代码,会发现一个错误:
45+
46+
![website](./pics/website.png)
47+
48+
你直接复制代码到vscode里面会出错:
49+
50+
![vscode](./pics/vscode.png)
51+
52+
翻了一下视频,才发现:
53+
54+
![youtube](./pics/youtube.png)
55+
56+
有个换行的反斜杠,在网站上没显示出来,原本这里应该是逗号连接的元组,结果现在好了,如果直接复制进vscode,它自动识别,帮你把前两个装在一起了,后两个落下了,好在也不难发现
16.4 MB
Binary file not shown.
10 MB
Binary file not shown.
23.5 MB
Binary file not shown.
41.2 MB
Binary file not shown.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
1,https://www.pexels.com/zh-cn/video/3796263/
2+
2,https://www.pexels.com/zh-cn/video/4340125/
3+
3,https://www.pexels.com/zh-cn/video/5137640/
4+
4,https://www.pexels.com/zh-cn/video/4761954/
38.3 KB
Loading

0 commit comments

Comments
 (0)