Skip to content

Commit abd56b5

Browse files
committed
Clean up changes from original commits
1 parent b548d47 commit abd56b5

File tree

1 file changed

+26
-12
lines changed

1 file changed

+26
-12
lines changed

scholarly/_scholarly.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@
66
import csv
77
import pprint
88
import datetime
9-
import itertools
10-
import warnings
119
from typing import Dict, List
10+
import re
1211
from ._navigator import Navigator
1312
from ._proxy_generator import ProxyGenerator
1413
from dotenv import find_dotenv, load_dotenv
@@ -284,23 +283,38 @@ def citedby(self, object: Publication)->_SearchScholarIterator:
284283
self.logger.warning("Object not supported for bibtex exportation")
285284
return
286285

287-
if object["bib"]["citedby"] < 999:
286+
if object["num_citations"] <= 1000:
288287
return PublicationParser(self.__nav).citedby(object)
288+
289+
self.logger.debug("Since the paper titled %s has %d citations (>1000), "
290+
"fetching it on an annual basis.", object["bib"]["title"], object["num_citations"])
291+
292+
year_end = int(datetime.date.today().year)
293+
294+
if object["source"] == PublicationSource.AUTHOR_PUBLICATION_ENTRY:
295+
self.fill(object)
296+
years = self._bin_citations_by_year(object.get("cites_per_year", {}), year_end)
289297
else:
290298
try:
291299
year_low = int(object["bib"]["pub_year"])
292-
year_end = int(datetime.date.today().year)
293300
except KeyError:
294-
self.logger.warning("Unknown publication year for paper %s, may result in incorrect number of citedby papers.", object["bib"]["title"])
301+
self.logger.warning("Unknown publication year for paper %s, may result in incorrect number "
302+
"of citedby papers.", object["bib"]["title"])
295303
return PublicationParser(self.__nav).citedby(object)
296304

297-
pub_id = int(object["citedby_url"].split("=")[1].split("&")[0])
298-
iter_list = []
299-
while year_low < year_end:
300-
iter_list.append(self.search_citedby(publication_id=pub_id, year_low=year_low, year_high=year_low+1))
301-
year_low += 1
302-
303-
return itertools.chain(*iter_list)
305+
# Go one year at a time in decreasing order
306+
years = zip(range(year_end, year_low-1, -1), range(year_end, year_low-1, -1))
307+
308+
# Extract cites_id. Note: There could be multiple ones, separated by commas.
309+
m = re.search("cites=[\d+,]*", object["citedby_url"])
310+
pub_id = m.group()[6:]
311+
for y_hi, y_lo in years:
312+
sub_citations = self.search_citedby(publication_id=pub_id, year_low=y_lo, year_high=y_hi)
313+
if sub_citations.total_results and (sub_citations.total_results > 1000):
314+
self.logger.warn("The paper titled %s has %d citations in the year %d. "
315+
"Due to the limitation in Google Scholar, fetching only 1000 results "
316+
"from that year.", object["bib"]["title"], sub_citations.total_results, y_lo)
317+
yield from sub_citations
304318

305319
def search_author_id(self, id: str, filled: bool = False, sortby: str = "citedby", publication_limit: int = 0)->Author:
306320
"""Search by author id and return a single Author object

0 commit comments

Comments
 (0)