1+ import cv2
2+ import torch
3+ from PIL import Image
4+ from time import sleep
5+ import os
6+ import time
7+ import datetime
8+
9+
10+ import argparse
11+ # parse arguments for different execution modes.
12+ parser = argparse .ArgumentParser ()
13+ parser .add_argument ('-s' , '--size' , help = 'Minimum Pixel Size (how many pixels the crop detection needs to be)' ,
14+ default = 100 ,
15+ type = int ,
16+ required = False )
17+ parser .add_argument ('-c' , '--confidence' , help = 'Confidence threshold (%) on detections' ,
18+ default = 0.7 ,
19+ type = float ,
20+ required = False )
21+ parser .add_argument ('-f' , '--frequency' , help = 'How frequently to capture cropped detected objects' ,
22+ default = 1 ,
23+ type = int ,
24+ required = False )
25+
26+
27+ args = parser .parse_args ()
28+
29+ # Change the working directory to the folder this script is in.
30+ os .chdir (os .path .dirname (os .path .abspath (__file__ )))
31+
32+ TOTAL_PEOPLE = 0
33+ global SCALE_FACTOR_X
34+ global SCALE_FACTOR_Y
35+ SCALE_FACTOR_X = 0.0
36+ SCALE_FACTOR_Y = 0.0
37+
38+ # Model
39+ model = torch .hub .load ('ultralytics/yolov5' ,
40+ 'yolov5s' ,
41+ force_reload = True ) # default yolov5.
42+
43+ # Get webcam interface via opencv-python
44+ video = cv2 .VideoCapture (1 )
45+ video .set (cv2 .CAP_PROP_FRAME_WIDTH , 1920 )
46+ video .set (cv2 .CAP_PROP_FRAME_HEIGHT , 1080 )
47+
48+ if not video .isOpened ():
49+ print ("Cannot open camera" )
50+ exit ()
51+
52+ # Infer via TorchHub and return the result
53+ def infer (i ):
54+ print ('[{}] New Inference Iteration' .format (datetime .datetime .now ()))
55+
56+ # Get the current image from the webcam
57+ ret , img = video .read ()
58+ # if frame is read correctly ret is True
59+ if not ret :
60+ print ("Can't receive frame (stream end?). Exiting ..." )
61+ return
62+
63+ # Resize (while maintaining the aspect ratio) to improve speed and save bandwidth
64+ height , width , channels = img .shape
65+ global SCALE_FACTOR_X , SCALE_FACTOR_Y
66+ if SCALE_FACTOR_X == 0.0 and SCALE_FACTOR_Y == 0.0 :
67+ SCALE_FACTOR_X = width / 640.0
68+ SCALE_FACTOR_Y = height / 640.0
69+ original_img = img
70+ img = cv2 .resize (img , (640 , 640 ))
71+
72+
73+ # Inference
74+ results = model ([img ], size = 640 ) # batch of images
75+
76+ # Results
77+ results .print ()
78+
79+
80+ results .xyxy [0 ] # im1 predictions (tensor)
81+ process = results .pandas ().xyxy [0 ] # .sort_values('confidence')
82+ # xmin ymin xmax ymax confidence class name
83+ # 0 749.50 43.50 1148.0 704.5 0.874023 0 person
84+ # 1 433.50 433.50 517.5 714.5 0.687988 27 tie
85+
86+
87+ print (process )
88+ count = len (process [process ['name' ]== 'person' ])
89+ if (count ) > 0 :
90+ print ('# People: {}' .format (count ))
91+ global TOTAL_PEOPLE
92+ TOTAL_PEOPLE += count
93+
94+ # Only execute this if we're on the frame that corresponds with the frequency from args.
95+ if (i % args .frequency ) == 0 :
96+ save_cropped_images (original_img , process ) # save images before drawing over them.
97+ else :
98+ return None
99+
100+ img = draw_over_image (img , process , count )
101+
102+ return img
103+
104+ # maybe useful function in the future
105+ def rescale_results (df ):
106+ global SCALE_FACTOR_X , SCALE_FACTOR_Y
107+ df ['scaledxmin' ] = df .apply (lambda x : x ['xmin' ] * SCALE_FACTOR_X , inplace = True )
108+ df ['scaledxmax' ] = df .apply (lambda x : x ['xmax' ] * SCALE_FACTOR_X , inplace = True )
109+ df ['scaledymin' ] = df .apply (lambda x : x ['ymin' ] * SCALE_FACTOR_Y , inplace = True )
110+ df ['scaledymax' ] = df .apply (lambda x : x ['ymax' ] * SCALE_FACTOR_Y , inplace = True )
111+
112+ return df
113+
114+
115+ def save_cropped_images (img , df ):
116+
117+ for idx , row in df .iterrows ():
118+ #print(row['xmin'], row['xmax'], row['ymin'], row['ymax'])
119+ xmin = int (row ['xmin' ] * SCALE_FACTOR_X )
120+ xmax = int (row ['xmax' ] * SCALE_FACTOR_X )
121+ ymin = int (row ['ymin' ] * SCALE_FACTOR_Y )
122+ ymax = int (row ['ymax' ] * SCALE_FACTOR_Y )
123+ print (xmin , xmax , ymin , ymax )
124+
125+ try :
126+ assert (xmax - xmin ) > args .size and (ymax - ymin ) > args .size and row ['confidence' ] > args .confidence
127+ except AssertionError :
128+ continue # skip this detection as it doesn't have enough pixels as we asked for.
129+
130+ # WATCH OUT! y goes first, then x.
131+ cropped_snip = img [ymin :ymax , xmin :xmax ] # region of the screen I'm interested in
132+
133+ # DEBUG CROPPED IMAGES
134+ '''
135+ while True:
136+ try:
137+ cv2.imshow('Cropped', cropped_snip)
138+ key = cv2.waitKey(1)
139+ if key == ord('q'):
140+ cv2.destroyAllWindows()
141+ break
142+ except cv2.error:
143+ continue
144+ '''
145+
146+
147+ result = cv2 .imwrite ('./runs/detect/miner/{}_{}.jpg' .format (row ['name' ],
148+ time .time_ns () // 1000000 ),
149+ cropped_snip
150+ )
151+
152+
153+
154+ def draw_over_image (img , df , count ):
155+
156+ draw_color = (128 , 128 , 128 ) # grey
157+ for idx , row in df .iterrows ():
158+ if row ['name' ] == 'person' :
159+ draw_color = (0 , 255 , 0 ) # green
160+ else :
161+ draw_color = (128 , 128 , 128 )
162+
163+ # FONT_HERSHEY_SIMPLEX
164+ img = cv2 .rectangle (img = img , pt1 = (int (row ['xmin' ]), int (row ['ymin' ])),
165+ pt2 = (int (row ['xmax' ]), int (row ['ymax' ])),
166+ color = draw_color ,
167+ thickness = 1
168+ )
169+
170+ cv2 .putText (img , row ['name' ], (int (row ['xmin' ])- 10 , int (row ['ymin' ])- 10 ),
171+ cv2 .FONT_HERSHEY_PLAIN , 1 ,
172+ draw_color
173+ )
174+
175+ cv2 .putText (img , row ['name' ], (int (row ['xmin' ])- 10 , int (row ['ymin' ])- 10 ),
176+ cv2 .FONT_HERSHEY_PLAIN , 1 ,
177+ draw_color
178+ )
179+
180+ global TOTAL_PEOPLE
181+ cv2 .putText (img , 'People: {}{}Total Detected Objects: {}' .format (count , '\n ' , TOTAL_PEOPLE ), (25 , 25 ),
182+ cv2 .FONT_HERSHEY_PLAIN , 1.2 ,
183+ draw_color
184+ )
185+
186+ return img
187+
188+ # Main loop; infers sequentially until you press "q"
189+ i = 1 # this will be our iterator for getting 1 every N frames (see args.frequency)
190+ while True :
191+
192+
193+ # Capture start time to calculate fps
194+ start = time .time ()
195+
196+ # Get a prediction
197+ image , results = infer (i )
198+
199+ if not image :
200+ continue # skip iteration
201+
202+ cv2 .imshow ('Predicted' , image )
203+ key = cv2 .waitKey (1 )
204+ if key == ord ('q' ):
205+ break
206+
207+ # Print frames per second
208+ print ('{} fps' .format (1 / (time .time ()- start )))
209+
210+ i += 1
211+
212+
213+ # Release resources when finished
214+ cv2 .destroyAllWindows ()
215+ video .release ()
0 commit comments