|
3 | 3 | import warnings |
4 | 4 | from base64 import urlsafe_b64decode |
5 | 5 | from collections import defaultdict |
| 6 | +try: |
| 7 | + from urllib.request import _parse_proxy |
| 8 | +except ImportError: |
| 9 | + from urllib2 import _parse_proxy |
6 | 10 |
|
7 | 11 | from six.moves.urllib.parse import urlparse, urlunparse |
8 | 12 | from w3lib.http import basic_auth_header |
|
17 | 21 | logger = logging.getLogger(__name__) |
18 | 22 |
|
19 | 23 |
|
| 24 | +def _remove_auth(auth_proxy_url): |
| 25 | + proxy_type, user, password, hostport = _parse_proxy(auth_proxy_url) |
| 26 | + return urlunparse((proxy_type, hostport, "", "", "", "")) |
| 27 | + |
| 28 | + |
20 | 29 | class ZyteSmartProxyMiddleware(object): |
21 | 30 |
|
22 | 31 | url = 'http://proxy.zyte.com:8011' |
@@ -108,6 +117,7 @@ def open_spider(self, spider): |
108 | 117 | return |
109 | 118 |
|
110 | 119 | self._auth_url = self._make_auth_url(spider) |
| 120 | + self._authless_url = _remove_auth(self._auth_url) |
111 | 121 |
|
112 | 122 | logger.info( |
113 | 123 | "Using Zyte Smart Proxy Manager at %s (apikey: %s)" % ( |
@@ -214,6 +224,19 @@ def process_request(self, request, spider): |
214 | 224 | if self._is_enabled_for_request(request): |
215 | 225 | if 'proxy' not in request.meta: |
216 | 226 | request.meta['proxy'] = self._auth_url |
| 227 | + elif ( |
| 228 | + request.meta['proxy'] == self._authless_url |
| 229 | + and b"Proxy-Authorization" not in request.headers |
| 230 | + ): |
| 231 | + logger.warning( |
| 232 | + "The value of the 'proxy' meta key of request {request} " |
| 233 | + "has no API key. You seem to have copied the value of " |
| 234 | + "the 'proxy' request meta key from a response or from a " |
| 235 | + "different request. Copying request meta keys set by " |
| 236 | + "middlewares from one request to another is a bad " |
| 237 | + "practice that can cause issues.".format(request=request) |
| 238 | + ) |
| 239 | + request.meta['proxy'] = self._auth_url |
217 | 240 | targets_zyte_api = self._targets_zyte_api(request) |
218 | 241 | self._set_zyte_smartproxy_default_headers(request) |
219 | 242 | request.meta['download_timeout'] = self.download_timeout |
|
0 commit comments