@@ -35,6 +35,13 @@ class CrawleraMiddlewareTestCase(TestCase):
3535 def setUp (self ):
3636 self .spider = Spider ('foo' )
3737 self .settings = {'CRAWLERA_APIKEY' : 'apikey' }
38+ Response_init_orig = Response .__init__
39+
40+ def Response_init_new (self , * args , ** kwargs ):
41+ assert not kwargs .get ('request' ), 'response objects at this stage shall not be pinned'
42+ return Response_init_orig (self , * args , ** kwargs )
43+
44+ Response .__init__ = Response_init_new
3845
3946 def _mock_crawlera_response (self , url , headers = None , ** kwargs ):
4047 crawlera_version = choice (("1.36.3-cd5e44" , "" , None ))
@@ -70,9 +77,9 @@ def _assert_disabled(self, spider, settings=None):
7077 self .assertEqual (req .meta .get ('proxy' ), None )
7178 self .assertEqual (req .meta .get ('download_timeout' ), None )
7279 self .assertEqual (req .headers .get ('Proxy-Authorization' ), None )
73- res = Response (req .url , request = req )
80+ res = Response (req .url )
7481 assert mw .process_response (req , res , spider ) is res
75- res = Response (req .url , status = mw .ban_code , request = req )
82+ res = Response (req .url , status = mw .ban_code )
7683 assert mw .process_response (req , res , spider ) is res
7784
7885 def _assert_enabled (self , spider ,
@@ -89,7 +96,7 @@ def _assert_enabled(self, spider,
8996 self .assertEqual (req .meta .get ('proxy' ), proxyurl )
9097 self .assertEqual (req .meta .get ('download_timeout' ), download_timeout )
9198 self .assertEqual (req .headers .get ('Proxy-Authorization' ), proxyauth )
92- res = self ._mock_crawlera_response (req .url , request = req )
99+ res = self ._mock_crawlera_response (req .url )
93100 assert mw .process_response (req , res , spider ) is res
94101
95102 # disabled if 'dont_proxy=True' is set
@@ -99,7 +106,7 @@ def _assert_enabled(self, spider,
99106 self .assertEqual (req .meta .get ('proxy' ), None )
100107 self .assertEqual (req .meta .get ('download_timeout' ), None )
101108 self .assertEqual (req .headers .get ('Proxy-Authorization' ), None )
102- res = self ._mock_crawlera_response (req .url , request = req )
109+ res = self ._mock_crawlera_response (req .url )
103110 assert mw .process_response (req , res , spider ) is res
104111 del req .meta ['dont_proxy' ]
105112
@@ -270,7 +277,6 @@ def test_delay_adjustment(self):
270277 ban_url ,
271278 status = self .bancode ,
272279 headers = headers ,
273- request = req
274280 )
275281 mw .process_response (req , res , self .spider )
276282 self .assertEqual (slot .delay , delay )
@@ -286,7 +292,6 @@ def test_delay_adjustment(self):
286292 ban_url ,
287293 status = self .bancode ,
288294 headers = headers ,
289- request = req
290295 )
291296 mw .process_response (req , res , self .spider )
292297 self .assertEqual (slot .delay , retry_after )
@@ -295,7 +300,7 @@ def test_delay_adjustment(self):
295300 # DNS cache should be cleared in case of errors
296301 dnscache ['proxy.crawlera.com' ] = '1.1.1.1'
297302
298- res = self ._mock_crawlera_response (url , request = req )
303+ res = self ._mock_crawlera_response (url )
299304 mw .process_response (req , res , self .spider )
300305 self .assertEqual (slot .delay , delay )
301306 self .assertEqual (self .spider .download_delay , delay )
@@ -308,7 +313,7 @@ def test_delay_adjustment(self):
308313 self .assertNotIn ('proxy.crawlera.com' , dnscache )
309314
310315 dnscache ['proxy.crawlera.com' ] = '1.1.1.1'
311- res = self ._mock_crawlera_response (ban_url , request = req )
316+ res = self ._mock_crawlera_response (ban_url )
312317 mw .process_response (req , res , self .spider )
313318 self .assertEqual (slot .delay , delay )
314319 self .assertEqual (self .spider .download_delay , delay )
@@ -320,7 +325,7 @@ def test_delay_adjustment(self):
320325 self .assertNotIn ('proxy.crawlera.com' , dnscache )
321326
322327 dnscache ['proxy.crawlera.com' ] = '1.1.1.1'
323- res = self ._mock_crawlera_response (ban_url , status = self .bancode , request = req )
328+ res = self ._mock_crawlera_response (ban_url , status = self .bancode )
324329 mw .process_response (req , res , self .spider )
325330 self .assertEqual (slot .delay , delay )
326331 self .assertEqual (self .spider .download_delay , delay )
@@ -562,7 +567,6 @@ def test_noslaves_delays(self, random_uniform_patch):
562567 ban_url ,
563568 status = self .bancode ,
564569 headers = headers ,
565- request = noslaves_req
566570 )
567571
568572 # delays grow exponentially
@@ -585,7 +589,6 @@ def test_noslaves_delays(self, random_uniform_patch):
585589 ban_url ,
586590 status = self .bancode ,
587591 headers = ban_headers ,
588- request = ban_req
589592 )
590593 mw .process_response (ban_req , ban_res , self .spider )
591594 self .assertEqual (slot .delay , default_delay )
@@ -597,7 +600,6 @@ def test_noslaves_delays(self, random_uniform_patch):
597600 good_res = self ._mock_crawlera_response (
598601 url ,
599602 status = 200 ,
600- request = good_req
601603 )
602604 mw .process_response (good_req , good_res , self .spider )
603605 self .assertEqual (slot .delay , default_delay )
@@ -631,7 +633,6 @@ def test_auth_error_retries(self, random_uniform_patch):
631633 auth_error_response = self ._mock_crawlera_response (
632634 ban_url ,
633635 status = self .auth_error_code ,
634- request = auth_error_req ,
635636 headers = auth_error_headers
636637 )
637638
@@ -641,34 +642,33 @@ def test_auth_error_retries(self, random_uniform_patch):
641642 retry_times = req .meta ["crawlera_auth_retry_times" ]
642643 self .assertEqual (retry_times , 1 )
643644
644- auth_error_response .meta ["crawlera_auth_retry_times" ] = retry_times
645+ auth_error_req .meta ["crawlera_auth_retry_times" ] = retry_times
645646 req = mw .process_response (auth_error_req , auth_error_response , self .spider )
646647 self .assertEqual (slot .delay , backoff_step * 2 ** 1 )
647648 retry_times = req .meta ["crawlera_auth_retry_times" ]
648649 self .assertEqual (retry_times , 2 )
649650
650- auth_error_response .meta ["crawlera_auth_retry_times" ] = retry_times
651+ auth_error_req .meta ["crawlera_auth_retry_times" ] = retry_times
651652 req = mw .process_response (auth_error_req , auth_error_response , self .spider )
652653 self .assertEqual (slot .delay , backoff_step * 2 ** 2 )
653654 retry_times = req .meta ["crawlera_auth_retry_times" ]
654655 self .assertEqual (retry_times , 3 )
655656
656- auth_error_response .meta ["crawlera_auth_retry_times" ] = retry_times
657+ auth_error_req .meta ["crawlera_auth_retry_times" ] = retry_times
657658 req = mw .process_response (auth_error_req , auth_error_response , self .spider )
658659 self .assertEqual (slot .delay , max_delay )
659660 retry_times = req .meta ["crawlera_auth_retry_times" ]
660661 self .assertEqual (retry_times , 4 )
661662
662663 # Should return a response when after max number of retries
663- auth_error_response .meta ["crawlera_auth_retry_times" ] = retry_times
664+ auth_error_req .meta ["crawlera_auth_retry_times" ] = retry_times
664665 res = mw .process_response (auth_error_req , auth_error_response , self .spider )
665666 self .assertIsInstance (res , Response )
666667
667668 # non crawlera 407 is not retried
668669 non_crawlera_407_response = self ._mock_crawlera_response (
669670 ban_url ,
670671 status = self .auth_error_code ,
671- request = auth_error_req ,
672672 )
673673 res = mw .process_response (auth_error_req , non_crawlera_407_response , self .spider )
674674 self .assertIsInstance (res , Response )
@@ -706,7 +706,7 @@ def test_process_response_enables_crawlera(self):
706706
707707 # A good code response should not enable it
708708 req = Request (url )
709- res = Response (url , status = 200 , request = req )
709+ res = Response (url , status = 200 )
710710 mw .process_request (req , self .spider )
711711 out = mw .process_response (req , res , self .spider )
712712 self .assertIsInstance (out , Response )
@@ -715,7 +715,7 @@ def test_process_response_enables_crawlera(self):
715715 self .assertEqual (mw .crawler .stats .get_stats (), {})
716716
717717 # A bad code response should enable it
718- res = Response (url , status = 403 , request = req )
718+ res = Response (url , status = 403 )
719719 mw .process_request (req , self .spider )
720720 out = mw .process_response (req , res , self .spider )
721721 self .assertIsInstance (out , Request )
@@ -727,7 +727,7 @@ def test_process_response_enables_crawlera(self):
727727
728728 # Another regular response with bad code should be done on crawlera
729729 # and not be retried
730- res = Response (url , status = 403 , request = req )
730+ res = Response (url , status = 403 )
731731 mw .process_request (req , self .spider )
732732 out = mw .process_response (req , res , self .spider )
733733 self .assertIsInstance (out , Response )
@@ -737,7 +737,7 @@ def test_process_response_enables_crawlera(self):
737737
738738 # A crawlera response with bad code should not be retried as well
739739 mw .process_request (req , self .spider )
740- res = self ._mock_crawlera_response (url , status = 403 , request = req )
740+ res = self ._mock_crawlera_response (url , status = 403 )
741741 out = mw .process_response (req , res , self .spider )
742742 self .assertIsInstance (out , Response )
743743 self .assertEqual (mw .enabled , False )
@@ -756,7 +756,7 @@ def test_process_response_from_file_scheme(self):
756756
757757 # A good code response should not enable it
758758 req = Request (url )
759- res = Response (url , status = 200 , request = req )
759+ res = Response (url , status = 200 )
760760 mw .process_request (req , self .spider )
761761 out = mw .process_response (req , res , self .spider )
762762 self .assertIsInstance (out , Response )
@@ -872,7 +872,6 @@ def test_no_slot(self):
872872 ban_url ,
873873 status = self .bancode ,
874874 headers = headers ,
875- request = noslaves_req
876875 )
877876 # checking that response was processed
878877 response = mw .process_response (noslaves_req , noslaves_res , self .spider )
0 commit comments