Skip to content

Commit 4b10d17

Browse files
committed
feat: added images, article v1
1 parent 86e9c58 commit 4b10d17

28 files changed

+388
-190
lines changed

custom_pytorch_yolov5/custom_pytorch.md

Lines changed: 79 additions & 190 deletions
Large diffs are not rendered by default.
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import torch
2+
from PIL import ImageGrab
3+
import argparse
4+
import time
5+
import cv2
6+
import numpy as np
7+
8+
# parse arguments for different execution modes.
9+
parser = argparse.ArgumentParser()
10+
parser.add_argument('-m', '--model', help='Model path',
11+
type=str,
12+
required=True)
13+
parser.add_argument('-d', '--detect', help='Detection mode (league / screen)',
14+
choices=['league', 'screenshot'],
15+
default='screenshot',
16+
type=str,
17+
required=False
18+
)
19+
20+
args = parser.parse_args()
21+
22+
23+
# Model
24+
model = torch.hub.load('ultralytics/yolov5',
25+
'custom',
26+
path=args.model,
27+
force_reload=False)
28+
29+
30+
def draw_over_image(img, df):
31+
32+
draw_color = (255, 255, 255)
33+
yellow = (128, 128, 0)
34+
green = (0, 255, 0)
35+
red = (255, 0, 0)
36+
for idx, row in df.iterrows():
37+
# FONT_HERSHEY_SIMPLEX
38+
if row['name'] == 'mask':
39+
draw_color = green
40+
elif row['name'] == 'incorrect':
41+
draw_color = yellow
42+
else:
43+
draw_color = red
44+
img = cv2.rectangle(img=img, pt1=(int(row['xmin']), int(row['ymin'])),
45+
pt2=(int(row['xmax']), int(row['ymax'])),
46+
color=draw_color,
47+
thickness=5
48+
)
49+
50+
cv2.putText(img, row['name'], (int(row['xmin'])-10, int(row['ymin'])-10), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=draw_color, thickness=2
51+
)
52+
53+
cv2.putText(img, row['name'], (int(row['xmin'])-10, int(row['ymin'])-10), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=draw_color, thickness=2
54+
)
55+
56+
return img
57+
58+
# Main loop; infers sequentially until you press "q"
59+
while True:
60+
61+
# Image
62+
if args.detect == 'league':
63+
im = ImageGrab.grab(bbox=(2140+100, 1030+100, 2560-100, 1440-100)) # bbox=(2140, 1030, 2560, 1440))
64+
else:
65+
im = ImageGrab.grab() # take a screenshot
66+
67+
img = np.array(im)
68+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
69+
70+
#img = cv2.resize(img, (1280, 1024))
71+
72+
# Inference
73+
results = model(img)
74+
# Capture start time to calculate fps
75+
start = time.time()
76+
77+
print(results.pandas().xyxy[0])
78+
79+
#results.show()
80+
81+
82+
83+
cv2.imshow('Image', draw_over_image(img, results.pandas().xyxy[0]))
84+
key = cv2.waitKey(30)
85+
if key == ord('q'):
86+
cv2.destroyAllWindows()
87+
break
88+
89+
# Print frames per second
90+
print('{} fps'.format(1/(time.time()-start)))
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
import cv2
2+
import torch
3+
from PIL import Image
4+
from time import sleep
5+
import os
6+
import time
7+
import datetime
8+
9+
10+
import argparse
11+
# parse arguments for different execution modes.
12+
parser = argparse.ArgumentParser()
13+
parser.add_argument('-s', '--size', help='Minimum Pixel Size (how many pixels the crop detection needs to be)',
14+
default=100,
15+
type=int,
16+
required=False)
17+
parser.add_argument('-c', '--confidence', help='Confidence threshold (%) on detections',
18+
default=0.7,
19+
type=float,
20+
required=False)
21+
parser.add_argument('-f', '--frequency', help='How frequently to capture cropped detected objects',
22+
default=1,
23+
type=int,
24+
required=False)
25+
26+
27+
args = parser.parse_args()
28+
29+
# Change the working directory to the folder this script is in.
30+
os.chdir(os.path.dirname(os.path.abspath(__file__)))
31+
32+
TOTAL_PEOPLE = 0
33+
global SCALE_FACTOR_X
34+
global SCALE_FACTOR_Y
35+
SCALE_FACTOR_X = 0.0
36+
SCALE_FACTOR_Y = 0.0
37+
38+
# Model
39+
model = torch.hub.load('ultralytics/yolov5',
40+
'yolov5s',
41+
force_reload=True) # default yolov5.
42+
43+
# Get webcam interface via opencv-python
44+
video = cv2.VideoCapture(1)
45+
video.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
46+
video.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
47+
48+
if not video.isOpened():
49+
print("Cannot open camera")
50+
exit()
51+
52+
# Infer via TorchHub and return the result
53+
def infer(i):
54+
print('[{}] New Inference Iteration'.format(datetime.datetime.now()))
55+
56+
# Get the current image from the webcam
57+
ret, img = video.read()
58+
# if frame is read correctly ret is True
59+
if not ret:
60+
print("Can't receive frame (stream end?). Exiting ...")
61+
return
62+
63+
# Resize (while maintaining the aspect ratio) to improve speed and save bandwidth
64+
height, width, channels = img.shape
65+
global SCALE_FACTOR_X, SCALE_FACTOR_Y
66+
if SCALE_FACTOR_X == 0.0 and SCALE_FACTOR_Y == 0.0:
67+
SCALE_FACTOR_X = width / 640.0
68+
SCALE_FACTOR_Y = height / 640.0
69+
original_img = img
70+
img = cv2.resize(img, (640, 640))
71+
72+
73+
# Inference
74+
results = model([img], size=640) # batch of images
75+
76+
# Results
77+
results.print()
78+
79+
80+
results.xyxy[0] # im1 predictions (tensor)
81+
process = results.pandas().xyxy[0] # .sort_values('confidence')
82+
# xmin ymin xmax ymax confidence class name
83+
# 0 749.50 43.50 1148.0 704.5 0.874023 0 person
84+
# 1 433.50 433.50 517.5 714.5 0.687988 27 tie
85+
86+
87+
print(process)
88+
count = len(process[process['name']=='person'])
89+
if (count) > 0:
90+
print('# People: {}'.format(count))
91+
global TOTAL_PEOPLE
92+
TOTAL_PEOPLE += count
93+
94+
# Only execute this if we're on the frame that corresponds with the frequency from args.
95+
if (i % args.frequency) == 0:
96+
save_cropped_images(original_img, process) # save images before drawing over them.
97+
else:
98+
return None
99+
100+
img = draw_over_image(img, process, count)
101+
102+
return img
103+
104+
# maybe useful function in the future
105+
def rescale_results(df):
106+
global SCALE_FACTOR_X, SCALE_FACTOR_Y
107+
df['scaledxmin'] = df.apply(lambda x: x['xmin'] * SCALE_FACTOR_X, inplace=True)
108+
df['scaledxmax'] = df.apply(lambda x: x['xmax'] * SCALE_FACTOR_X, inplace=True)
109+
df['scaledymin'] = df.apply(lambda x: x['ymin'] * SCALE_FACTOR_Y, inplace=True)
110+
df['scaledymax'] = df.apply(lambda x: x['ymax'] * SCALE_FACTOR_Y, inplace=True)
111+
112+
return df
113+
114+
115+
def save_cropped_images(img, df):
116+
117+
for idx, row in df.iterrows():
118+
#print(row['xmin'], row['xmax'], row['ymin'], row['ymax'])
119+
xmin = int(row['xmin'] * SCALE_FACTOR_X)
120+
xmax = int(row['xmax'] * SCALE_FACTOR_X)
121+
ymin = int(row['ymin'] * SCALE_FACTOR_Y)
122+
ymax = int(row['ymax'] * SCALE_FACTOR_Y)
123+
print(xmin, xmax, ymin, ymax)
124+
125+
try:
126+
assert (xmax - xmin) > args.size and (ymax - ymin) > args.size and row['confidence'] > args.confidence
127+
except AssertionError:
128+
continue # skip this detection as it doesn't have enough pixels as we asked for.
129+
130+
# WATCH OUT! y goes first, then x.
131+
cropped_snip = img[ymin:ymax, xmin:xmax] # region of the screen I'm interested in
132+
133+
# DEBUG CROPPED IMAGES
134+
'''
135+
while True:
136+
try:
137+
cv2.imshow('Cropped', cropped_snip)
138+
key = cv2.waitKey(1)
139+
if key == ord('q'):
140+
cv2.destroyAllWindows()
141+
break
142+
except cv2.error:
143+
continue
144+
'''
145+
146+
147+
result = cv2.imwrite('./runs/detect/miner/{}_{}.jpg'.format(row['name'],
148+
time.time_ns() // 1000000),
149+
cropped_snip
150+
)
151+
152+
153+
154+
def draw_over_image(img, df, count):
155+
156+
draw_color = (128, 128, 128) # grey
157+
for idx, row in df.iterrows():
158+
if row['name'] == 'person':
159+
draw_color = (0, 255, 0) # green
160+
else:
161+
draw_color = (128, 128, 128)
162+
163+
# FONT_HERSHEY_SIMPLEX
164+
img = cv2.rectangle(img=img, pt1=(int(row['xmin']), int(row['ymin'])),
165+
pt2=(int(row['xmax']), int(row['ymax'])),
166+
color=draw_color,
167+
thickness=1
168+
)
169+
170+
cv2.putText(img, row['name'], (int(row['xmin'])-10, int(row['ymin'])-10),
171+
cv2.FONT_HERSHEY_PLAIN, 1,
172+
draw_color
173+
)
174+
175+
cv2.putText(img, row['name'], (int(row['xmin'])-10, int(row['ymin'])-10),
176+
cv2.FONT_HERSHEY_PLAIN, 1,
177+
draw_color
178+
)
179+
180+
global TOTAL_PEOPLE
181+
cv2.putText(img, 'People: {}{}Total Detected Objects: {}'.format(count, '\n', TOTAL_PEOPLE), (25, 25),
182+
cv2.FONT_HERSHEY_PLAIN, 1.2,
183+
draw_color
184+
)
185+
186+
return img
187+
188+
# Main loop; infers sequentially until you press "q"
189+
i = 1 # this will be our iterator for getting 1 every N frames (see args.frequency)
190+
while True:
191+
192+
193+
# Capture start time to calculate fps
194+
start = time.time()
195+
196+
# Get a prediction
197+
image, results = infer(i)
198+
199+
if not image:
200+
continue # skip iteration
201+
202+
cv2.imshow('Predicted', image)
203+
key = cv2.waitKey(1)
204+
if key == ord('q'):
205+
break
206+
207+
# Print frames per second
208+
print('{} fps'.format(1/(time.time()-start)))
209+
210+
i+= 1
211+
212+
213+
# Release resources when finished
214+
cv2.destroyAllWindows()
215+
video.release()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
numpy==1.24.1
2+
opencv_python==4.7.0.68
3+
Pillow==9.4.0
4+
torch==1.11.0+cu113
29 KB
Loading
374 KB
Loading
9.73 KB
Loading
22.8 KB
Loading
43.3 KB
Loading
2.58 MB
Loading

0 commit comments

Comments
 (0)