Skip to content

Commit 677b936

Browse files
authored
Work as expected with a warning on request meta copy (#116)
1 parent e43f5a2 commit 677b936

File tree

2 files changed

+168
-34
lines changed

2 files changed

+168
-34
lines changed

scrapy_zyte_smartproxy/middleware.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
import warnings
44
from base64 import urlsafe_b64decode
55
from collections import defaultdict
6+
try:
7+
from urllib.request import _parse_proxy
8+
except ImportError:
9+
from urllib2 import _parse_proxy
610

711
from six.moves.urllib.parse import urlparse, urlunparse
812
from w3lib.http import basic_auth_header
@@ -17,6 +21,11 @@
1721
logger = logging.getLogger(__name__)
1822

1923

24+
def _remove_auth(auth_proxy_url):
25+
proxy_type, user, password, hostport = _parse_proxy(auth_proxy_url)
26+
return urlunparse((proxy_type, hostport, "", "", "", ""))
27+
28+
2029
class ZyteSmartProxyMiddleware(object):
2130

2231
url = 'http://proxy.zyte.com:8011'
@@ -108,6 +117,7 @@ def open_spider(self, spider):
108117
return
109118

110119
self._auth_url = self._make_auth_url(spider)
120+
self._authless_url = _remove_auth(self._auth_url)
111121

112122
logger.info(
113123
"Using Zyte Smart Proxy Manager at %s (apikey: %s)" % (
@@ -214,6 +224,19 @@ def process_request(self, request, spider):
214224
if self._is_enabled_for_request(request):
215225
if 'proxy' not in request.meta:
216226
request.meta['proxy'] = self._auth_url
227+
elif (
228+
request.meta['proxy'] == self._authless_url
229+
and b"Proxy-Authorization" not in request.headers
230+
):
231+
logger.warning(
232+
"The value of the 'proxy' meta key of request {request} "
233+
"has no API key. You seem to have copied the value of "
234+
"the 'proxy' request meta key from a response or from a "
235+
"different request. Copying request meta keys set by "
236+
"middlewares from one request to another is a bad "
237+
"practice that can cause issues.".format(request=request)
238+
)
239+
request.meta['proxy'] = self._auth_url
217240
targets_zyte_api = self._targets_zyte_api(request)
218241
self._set_zyte_smartproxy_default_headers(request)
219242
request.meta['download_timeout'] = self.download_timeout

0 commit comments

Comments
 (0)