From ef36523594ab3723e03c3be4a382e96988c2d483 Mon Sep 17 00:00:00 2001 From: Usama Khalil Date: Thu, 13 Nov 2025 17:43:29 +0200 Subject: [PATCH] Fix regex pattern for citedby_url extraction - Prefix the string literal with an r - ( m = re.search(r"cites=[\d+,]*", object["citedby_url"]) - This tells Python to treat backslashes literally, preventing them from being interpreted as escape sequence initiators. - Fix: scholarly/_scholarly.py:312: SyntaxWarning: invalid escape sequence '\d' m = re.search("cites=[\d+,]*", object["citedby_url"]) --- scholarly/_scholarly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scholarly/_scholarly.py b/scholarly/_scholarly.py index 4f64f51..de0035e 100644 --- a/scholarly/_scholarly.py +++ b/scholarly/_scholarly.py @@ -309,7 +309,7 @@ def citedby(self, object: Publication)->_SearchScholarIterator: def _citedby_long(self, object: Publication, years): # Extract cites_id. Note: There could be multiple ones, separated by commas. - m = re.search("cites=[\d+,]*", object["citedby_url"]) + m = re.search(r"cites=[\d+,]*", object["citedby_url"]) pub_id = m.group()[6:] for y_hi, y_lo in years: sub_citations = self.search_citedby(publication_id=pub_id, year_low=y_lo, year_high=y_hi)