Skip to content

Commit 6dfba7a

Browse files
authored
Merge pull request #449 from 0x2b3bfa0/patch-1
Use cursor based pagination
2 parents 72d35a9 + 7551829 commit 6dfba7a

File tree

1 file changed

+40
-29
lines changed

1 file changed

+40
-29
lines changed

github_backup/github_backup.py

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -592,27 +592,26 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
592592
auth = get_auth(args, encode=not args.as_app)
593593
query_args = get_query_args(query_args)
594594
per_page = 100
595-
page = 0
595+
next_url = None
596596

597597
while True:
598598
if single_request:
599-
request_page, request_per_page = None, None
599+
request_per_page = None
600600
else:
601-
page = page + 1
602-
request_page, request_per_page = page, per_page
601+
request_per_page = per_page
603602

604603
request = _construct_request(
605604
request_per_page,
606-
request_page,
607605
query_args,
608-
template,
606+
next_url or template,
609607
auth,
610608
as_app=args.as_app,
611609
fine=True if args.token_fine is not None else False,
612610
) # noqa
613-
r, errors = _get_response(request, auth, template)
611+
r, errors = _get_response(request, auth, next_url or template)
614612

615613
status_code = int(r.getcode())
614+
616615
# Check if we got correct data
617616
try:
618617
response = json.loads(r.read().decode("utf-8"))
@@ -644,15 +643,14 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
644643
retries += 1
645644
time.sleep(5)
646645
request = _construct_request(
647-
per_page,
648-
page,
646+
request_per_page,
649647
query_args,
650-
template,
648+
next_url or template,
651649
auth,
652650
as_app=args.as_app,
653651
fine=True if args.token_fine is not None else False,
654652
) # noqa
655-
r, errors = _get_response(request, auth, template)
653+
r, errors = _get_response(request, auth, next_url or template)
656654

657655
status_code = int(r.getcode())
658656
try:
@@ -682,7 +680,16 @@ def retrieve_data_gen(args, template, query_args=None, single_request=False):
682680
if type(response) is list:
683681
for resp in response:
684682
yield resp
685-
if len(response) < per_page:
683+
# Parse Link header for next page URL (cursor-based pagination)
684+
link_header = r.headers.get("Link", "")
685+
next_url = None
686+
if link_header:
687+
# Parse Link header: <https://api.github.com/...?per_page=100&after=cursor>; rel="next"
688+
for link in link_header.split(","):
689+
if 'rel="next"' in link:
690+
next_url = link[link.find("<") + 1:link.find(">")]
691+
break
692+
if not next_url:
686693
break
687694
elif type(response) is dict and single_request:
688695
yield response
@@ -735,22 +742,27 @@ def _get_response(request, auth, template):
735742

736743

737744
def _construct_request(
738-
per_page, page, query_args, template, auth, as_app=None, fine=False
745+
per_page, query_args, template, auth, as_app=None, fine=False
739746
):
740-
all_query_args = {}
741-
if per_page:
742-
all_query_args["per_page"] = per_page
743-
if page:
744-
all_query_args["page"] = page
745-
if query_args:
746-
all_query_args.update(query_args)
747-
748-
request_url = template
749-
if all_query_args:
750-
querystring = urlencode(all_query_args)
751-
request_url = template + "?" + querystring
747+
# If template is already a full URL with query params (from Link header), use it directly
748+
if "?" in template and template.startswith("http"):
749+
request_url = template
750+
# Extract query string for logging
751+
querystring = template.split("?", 1)[1]
752752
else:
753-
querystring = ""
753+
# Build URL with query parameters
754+
all_query_args = {}
755+
if per_page:
756+
all_query_args["per_page"] = per_page
757+
if query_args:
758+
all_query_args.update(query_args)
759+
760+
request_url = template
761+
if all_query_args:
762+
querystring = urlencode(all_query_args)
763+
request_url = template + "?" + querystring
764+
else:
765+
querystring = ""
754766

755767
request = Request(request_url)
756768
if auth is not None:
@@ -766,7 +778,7 @@ def _construct_request(
766778
"Accept", "application/vnd.github.machine-man-preview+json"
767779
)
768780

769-
log_url = template
781+
log_url = template if "?" not in template else template.split("?")[0]
770782
if querystring:
771783
log_url += "?" + querystring
772784
logger.info("Requesting {}".format(log_url))
@@ -843,8 +855,7 @@ def download_file(url, path, auth, as_app=False, fine=False):
843855
return
844856

845857
request = _construct_request(
846-
per_page=100,
847-
page=1,
858+
per_page=None,
848859
query_args={},
849860
template=url,
850861
auth=auth,

0 commit comments

Comments
 (0)