@@ -191,8 +191,13 @@ def process_response(self, request, response, spider):
191191 self ._restore_original_delay (request )
192192
193193 if self ._is_no_available_proxies (response ) or self ._is_auth_error (response ):
194- self ._set_custom_delay (request , next (self .exp_backoff ))
194+ if self ._is_no_available_proxies (response ):
195+ reason = 'noslaves'
196+ else :
197+ reason = 'autherror'
198+ self ._set_custom_delay (request , next (self .exp_backoff ), reason = reason )
195199 else :
200+ self .crawler .stats .inc_value ('crawlera/delay/reset_backoff' )
196201 self .exp_backoff = exp_backoff (self .backoff_step , self .backoff_max )
197202
198203 if self ._is_auth_error (response ):
@@ -202,6 +207,7 @@ def process_response(self, request, response, spider):
202207 if retries < self .max_auth_retry_times :
203208 return self ._retry_auth (response , request , spider )
204209 else :
210+ self .crawler .stats .inc_value ('crawlera/retries/auth/max_reached' )
205211 logging .warning (
206212 "Max retries for authentication issues reached, please check auth"
207213 " information settings" ,
@@ -215,7 +221,7 @@ def process_response(self, request, response, spider):
215221 else :
216222 after = response .headers .get ('retry-after' )
217223 if after :
218- self ._set_custom_delay (request , float (after ))
224+ self ._set_custom_delay (request , float (after ), reason = 'banned' )
219225 self .crawler .stats .inc_value ('crawlera/response/banned' )
220226 else :
221227 self ._bans [key ] = 0
@@ -235,7 +241,7 @@ def process_exception(self, request, exception, spider):
235241 if isinstance (exception , (ConnectionRefusedError , ConnectionDone )):
236242 # Handle crawlera downtime
237243 self ._clear_dns_cache ()
238- self ._set_custom_delay (request , self .connection_refused_delay )
244+ self ._set_custom_delay (request , self .connection_refused_delay , reason = 'conn_refused' )
239245
240246 def _handle_not_enabled_response (self , request , response ):
241247 if self ._should_enable_for_response (response ):
@@ -244,6 +250,7 @@ def _handle_not_enabled_response(self, request, response):
244250
245251 retryreq = request .copy ()
246252 retryreq .dont_filter = True
253+ self .crawler .stats .inc_value ('crawlera/retries/should_have_been_enabled' )
247254 return retryreq
248255 return response
249256
@@ -256,6 +263,7 @@ def _retry_auth(self, response, request, spider):
256263 retryreq = request .copy ()
257264 retryreq .meta ['crawlera_auth_retry_times' ] = retries
258265 retryreq .dont_filter = True
266+ self .crawler .stats .inc_value ('crawlera/retries/auth' )
259267 return retryreq
260268
261269 def _clear_dns_cache (self ):
@@ -286,14 +294,17 @@ def _get_slot(self, request):
286294 key = self ._get_slot_key (request )
287295 return key , self .crawler .engine .downloader .slots .get (key )
288296
289- def _set_custom_delay (self , request , delay ):
297+ def _set_custom_delay (self , request , delay , reason = None ):
290298 """Set custom delay for slot and save original one."""
291299 key , slot = self ._get_slot (request )
292300 if not slot :
293301 return
294302 if self ._saved_delays [key ] is None :
295303 self ._saved_delays [key ] = slot .delay
296304 slot .delay = delay
305+ if reason is not None :
306+ self .crawler .stats .inc_value ('crawlera/delay/%s' % reason )
307+ self .crawler .stats .inc_value ('crawlera/delay/%s/total' % reason , delay )
297308
298309 def _restore_original_delay (self , request ):
299310 """Restore original delay for slot if it was changed."""
0 commit comments