Retry with same proxy after 404

arunkannawadi · arunkannawadi · commit 630d12ce61c1 · 2023-01-16T16:45:37.000-05:00
diff --git a/scholarly/_navigator.py b/scholarly/_navigator.py
@@ -119,6 +119,14 @@ def _get_page(self, pagerequest: str, premium: bool = False) -> str:
 
                 if resp.status_code == 200 and not has_captcha:
                     return resp.text
+                elif resp.status_code == 404:
+                    # If the scholar_id was approximate, it first appears as
+                    # 404 (or 302), and then gets redirected to the correct profile.
+                    # In such cases, we need to try again with the same session.
+                    # See https://github.com/scholarly-python-package/scholarly/issues/469.
+                    self.logger.debug("Got a 404 error. Attempting with same proxy")
+                    tries += 1
+                    continue
                 elif has_captcha:
                     self.logger.info("Got a captcha request.")
                     session = pm._handle_captcha2(pagerequest)
diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py
@@ -451,7 +451,7 @@ def _handle_captcha2(self, url):
         return self._session
 
     def _new_session(self, **kwargs):
-        init_kwargs = {}
+        init_kwargs = {"follow_redirects": True}
         init_kwargs.update(kwargs)
         proxies = {}
         if self._session:
@@ -610,7 +610,7 @@ def ScraperAPI(self, API_KEY, country_code=None, premium=False, render=False):
         # https://www.scraperapi.com/documentation/
         self._TIMEOUT = 60
 
-        prefix = "http://scraperapi"
+        prefix = "http://scraperapi.retry_404=true"
         if country_code is not None:
             prefix += ".country_code=" + country_code
         if premium: