Skip to content

Commit 630d12c

Browse files
committed
Retry with same proxy after 404
1 parent 1104210 commit 630d12c

File tree

2 files changed

+10
-2
lines changed

2 files changed

+10
-2
lines changed

scholarly/_navigator.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,14 @@ def _get_page(self, pagerequest: str, premium: bool = False) -> str:
119119

120120
if resp.status_code == 200 and not has_captcha:
121121
return resp.text
122+
elif resp.status_code == 404:
123+
# If the scholar_id was approximate, it first appears as
124+
# 404 (or 302), and then gets redirected to the correct profile.
125+
# In such cases, we need to try again with the same session.
126+
# See https://github.com/scholarly-python-package/scholarly/issues/469.
127+
self.logger.debug("Got a 404 error. Attempting with same proxy")
128+
tries += 1
129+
continue
122130
elif has_captcha:
123131
self.logger.info("Got a captcha request.")
124132
session = pm._handle_captcha2(pagerequest)

scholarly/_proxy_generator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ def _handle_captcha2(self, url):
451451
return self._session
452452

453453
def _new_session(self, **kwargs):
454-
init_kwargs = {}
454+
init_kwargs = {"follow_redirects": True}
455455
init_kwargs.update(kwargs)
456456
proxies = {}
457457
if self._session:
@@ -610,7 +610,7 @@ def ScraperAPI(self, API_KEY, country_code=None, premium=False, render=False):
610610
# https://www.scraperapi.com/documentation/
611611
self._TIMEOUT = 60
612612

613-
prefix = "http://scraperapi"
613+
prefix = "http://scraperapi.retry_404=true"
614614
if country_code is not None:
615615
prefix += ".country_code=" + country_code
616616
if premium:

0 commit comments

Comments
 (0)