2828from requests .packages .urllib3 .util .retry import Retry
2929from absl import logging
3030
31- # Constants from firebase_github.py
31+ # Constants for GitHub API interaction
3232RETRIES = 3
3333BACKOFF = 5
34- RETRY_STATUS = (403 , 500 , 502 , 504 )
35- TIMEOUT = 5
36- TIMEOUT_LONG = 20 # Not used in the functions we are copying, but good to have if expanding.
37-
38- OWNER = '' # Will be determined dynamically or from args
39- REPO = '' # Will be determined dynamically or from args
40- BASE_URL = 'https://api.github.com'
41- GITHUB_API_URL = '' # Will be set by set_repo_url_standalone
34+ RETRY_STATUS = (403 , 500 , 502 , 504 ) # HTTP status codes to retry on
35+ TIMEOUT = 5 # Default timeout for requests in seconds
36+ TIMEOUT_LONG = 20 # Longer timeout, currently not used by functions in this script
37+
38+ # Global variables for the target repository.
39+ # These are populated by set_repo_url_standalone() after determining
40+ # the owner and repo from arguments or git configuration.
41+ OWNER = ''
42+ REPO = ''
43+ BASE_URL = 'https://api.github.com' # Base URL for GitHub API
44+ GITHUB_API_URL = '' # Dynamically constructed API URL for the specific repository
4245
4346logging .set_verbosity (logging .INFO )
4447
@@ -72,40 +75,37 @@ def get_pull_request_review_comments(token, pull_number, since=None):
7275 headers = {'Accept' : 'application/vnd.github.v3+json' , 'Authorization' : f'token { token } ' }
7376
7477 page = 1
75- per_page = 100
78+ per_page = 100 # GitHub API default and max is 100 for many paginated endpoints
7679 results = []
7780
78- # Base parameters for the API request
7981 base_params = {'per_page' : per_page }
8082 if since :
81- base_params ['since' ] = since
83+ base_params ['since' ] = since # Filter comments by timestamp
8284
83- while True : # Loop indefinitely until explicitly broken
85+ while True :
8486 current_page_params = base_params .copy ()
8587 current_page_params ['page' ] = page
8688
8789 try :
8890 with requests_retry_session ().get (url , headers = headers , params = current_page_params ,
8991 stream = True , timeout = TIMEOUT ) as response :
9092 response .raise_for_status ()
91- # Log which page and if 'since' was used for clarity
9293 logging .info ("get_pull_request_review_comments: %s params %s response: %s" , url , current_page_params , response )
9394
9495 current_page_results = response .json ()
95- if not current_page_results : # No more results on this page
96- break # Exit loop, no more comments to fetch
96+ if not current_page_results : # No more data on this page
97+ break
9798
9899 results .extend (current_page_results )
99100
100- # If fewer results than per_page were returned, it's the last page
101- if len (current_page_results ) < per_page :
102- break # Exit loop, this was the last page
101+ if len (current_page_results ) < per_page : # Last page
102+ break
103103
104- page += 1 # Increment page for the next iteration
104+ page += 1
105105
106106 except requests .exceptions .RequestException as e :
107107 logging .error (f"Error fetching review comments (page { page } , params: { current_page_params } ) for PR { pull_number } : { e } " )
108- break # Stop trying if there's an error
108+ break
109109 return results
110110
111111
@@ -124,22 +124,19 @@ def list_pull_requests(token, state, head, base):
124124 if base : params .update ({'base' : base })
125125 page = page + 1
126126 keep_going = False
127- # Ensure GITHUB_API_URL is set before this function is called if OWNER/REPO are not passed explicitly
128- # For standalone script, OWNER and REPO are global and GITHUB_API_URL is set by set_repo_url_standalone
129127 try :
130128 with requests_retry_session ().get (url , headers = headers , params = params ,
131129 stream = True , timeout = TIMEOUT ) as response :
132130 logging .info ("list_pull_requests: %s params: %s response: %s" , url , params , response )
133- response .raise_for_status () # Raise an exception for bad status codes
131+ response .raise_for_status ()
134132 current_page_results = response .json ()
135- if not current_page_results : # No more results on this page
133+ if not current_page_results :
136134 break
137135 results .extend (current_page_results )
138- # If exactly per_page results were retrieved, read the next page.
139136 keep_going = (len (current_page_results ) == per_page )
140137 except requests .exceptions .RequestException as e :
141138 logging .error (f"Error listing pull requests (page { params .get ('page' , 'N/A' )} , params: { params } ) for { OWNER } /{ REPO } : { e } " )
142- break # Stop trying if there's an error
139+ break
143140 return results
144141
145142
@@ -158,21 +155,20 @@ def get_latest_pr_for_branch(token, owner, repo, branch_name):
158155 sys .stderr .write ("Owner and repo must be set to find PR for branch.\n " )
159156 return None
160157
161- head_branch_spec = f"{ owner } :{ branch_name } " # GitHub API requires owner in head spec for forks
162- prs = list_pull_requests (token = token , state = "open" , head = head_branch_spec , base = None ) # base can be None
158+ head_branch_spec = f"{ owner } :{ branch_name } " # Format required by GitHub API for head branch
159+ prs = list_pull_requests (token = token , state = "open" , head = head_branch_spec , base = None )
163160
164161 if not prs :
165162 return None
166163
167- # Sort PRs by creation date, most recent first
168- # PRs are dictionaries, 'created_at' is an ISO 8601 string
164+ # Sort PRs by creation date (most recent first) to find the latest.
169165 try :
170166 prs .sort (key = lambda pr : pr .get ("created_at" , "" ), reverse = True )
171- except Exception as e :
167+ except Exception as e : # Broad exception for safety, though sort issues are rare with valid data.
172168 sys .stderr .write (f"Could not sort PRs by creation date: { e } \n " )
173- return None # Or handle more gracefully
169+ return None
174170
175- if prs : # Check if list is not empty after sort
171+ if prs :
176172 return prs [0 ].get ("number" )
177173 return None
178174
@@ -187,21 +183,19 @@ def main():
187183 try :
188184 git_url_bytes = subprocess .check_output (["git" , "remote" , "get-url" , "origin" ], stderr = subprocess .PIPE )
189185 git_url = git_url_bytes .decode ().strip ()
190- # Regex for https://github.com/owner/repo.git or git@github.com:owner/repo.git
191186 match = re .search (r"(?:(?:https?://github\.com/)|(?:git@github\.com:))([^/]+)/([^/.]+)(?:\.git)?" , git_url )
192187 if match :
193188 determined_owner = match .group (1 )
194189 determined_repo = match .group (2 )
195190 sys .stderr .write (f"Determined repository: { determined_owner } /{ determined_repo } from git remote.\n " )
196191 except (subprocess .CalledProcessError , FileNotFoundError , UnicodeDecodeError ) as e :
197192 sys .stderr .write (f"Could not automatically determine repository from git remote: { e } \n " )
198- except Exception as e : # Catch any other unexpected error during git processing
193+ except Exception as e : # Catch any other unexpected error during git processing, though less likely.
199194 sys .stderr .write (f"An unexpected error occurred while determining repository: { e } \n " )
200195
201- # Helper function to parse owner/repo from URL
202196 def parse_repo_url (url_string ):
203- # Regex for https://github.com/owner/repo.git or git@github.com:owner/repo.git
204- # Also handles URLs without .git suffix
197+ """Parses owner and repository name from various GitHub URL formats."""
198+ # Handles https://github.com/owner/repo.git, git@github.com:owner/repo.git, and URLs without .git suffix.
205199 url_match = re .search (r"(?:(?:https?://github\.com/)|(?:git@github\.com:))([^/]+)/([^/.]+?)(?:\.git)?/?$" , url_string )
206200 if url_match :
207201 return url_match .group (1 ), url_match .group (2 )
@@ -212,11 +206,10 @@ def parse_repo_url(url_string):
212206 "Repository can be specified via --url, or --owner AND --repo, or auto-detected from git remote 'origin'." ,
213207 formatter_class = argparse .RawTextHelpFormatter
214208 )
215- # Arguments for repository specification
216209 parser .add_argument (
217210 "--pull_number" ,
218211 type = int ,
219- default = None , # Now optional
212+ default = None ,
220213 help = "Pull request number. If not provided, script attempts to find the latest open PR for the current git branch."
221214 )
222215 parser .add_argument (
@@ -228,13 +221,13 @@ def parse_repo_url(url_string):
228221 parser .add_argument (
229222 "--owner" ,
230223 type = str ,
231- default = determined_owner , # Default to auto-detected
224+ default = determined_owner ,
232225 help = f"Repository owner. Used if --url is not provided. { 'Default: ' + determined_owner if determined_owner else 'Required if --url is not used and not determinable from git.' } "
233226 )
234227 parser .add_argument (
235228 "--repo" ,
236229 type = str ,
237- default = determined_repo , # Default to auto-detected
230+ default = determined_repo ,
238231 help = f"Repository name. Used if --url is not provided. { 'Default: ' + determined_repo if determined_repo else 'Required if --url is not used and not determinable from git.' } "
239232 )
240233 parser .add_argument (
@@ -278,14 +271,8 @@ def parse_repo_url(url_string):
278271 final_owner = None
279272 final_repo = None
280273
281- # Logic for determining owner and repo:
282- # 1. If --url is provided, use it. Error if --owner or --repo are also explicitly set.
283- # 2. Else if --owner and --repo are provided, use them.
284- # 3. Else (neither --url nor --owner/--repo pair explicitly set), use auto-detected values (which are defaults for args.owner/args.repo).
285-
274+ # Determine repository owner and name
286275 if args .url :
287- # URL is provided, it takes precedence.
288- # Error if --owner or --repo were also explicitly set by the user (i.e., different from their defaults)
289276 owner_explicitly_set = args .owner is not None and args .owner != determined_owner
290277 repo_explicitly_set = args .repo is not None and args .repo != determined_repo
291278 if owner_explicitly_set or repo_explicitly_set :
@@ -301,44 +288,38 @@ def parse_repo_url(url_string):
301288 sys .stderr .write (f"Error: Invalid URL format: { args .url } . Expected https://github.com/owner/repo or git@github.com:owner/repo.git{ error_suffix } \n " )
302289 sys .exit (1 )
303290 else :
304- # URL is not provided, try to use owner/repo.
305- # These could be from explicit user input or from auto-detection (via defaults).
306291 is_owner_from_user = args .owner is not None and args .owner != determined_owner
307292 is_repo_from_user = args .repo is not None and args .repo != determined_repo
308- is_owner_from_default = args .owner is not None and args .owner == determined_owner and determined_owner is not None
309- is_repo_from_default = args .repo is not None and args .repo == determined_repo and determined_repo is not None
310293
311- if (is_owner_from_user or is_repo_from_user ): # User explicitly set at least one
312- if args .owner and args .repo : # Both were set (either explicitly or one explicit, one default that existed)
294+ if (is_owner_from_user or is_repo_from_user ): # User explicitly set at least one of owner/repo
295+ if args .owner and args .repo :
313296 final_owner = args .owner
314297 final_repo = args .repo
315298 sys .stderr .write (f"Using repository from --owner/--repo args: { final_owner } /{ final_repo } \n " )
316- else : # User set one but not the other, and the other wasn't available from a default
299+ else : # User set one but not the other ( and the other wasn't available from a default)
317300 sys .stderr .write (f"Error: Both --owner and --repo must be specified if one is provided explicitly (and --url is not used).{ error_suffix } \n " )
318301 sys .exit (1 )
319- elif args .owner and args .repo : # Both are from successful auto-detection (already printed "Determined repository...")
302+ elif args .owner and args .repo : # Both args have values, from successful auto-detection
320303 final_owner = args .owner
321304 final_repo = args .repo
322- elif args .owner or args .repo : # Only one was available (e.g. auto-detect only found one, or bad default state )
305+ elif args .owner or args .repo : # Only one has a value (e.g. auto-detect only found one)
323306 sys .stderr .write (f"Error: Both --owner and --repo are required if not using --url, and auto-detection was incomplete.{ error_suffix } \n " )
324307 sys .exit (1 )
325- # If none of the above, final_owner/repo remain None, will be caught by the check below .
308+ # If final_owner/repo are still None here, it means auto-detection failed and user provided nothing .
326309
327310 if not final_owner or not final_repo :
328311 sys .stderr .write (f"Error: Could not determine repository. Please specify --url, OR both --owner and --repo, OR ensure git remote 'origin' is configured correctly.{ error_suffix } \n " )
329312 sys .exit (1 )
330313
331-
332- if not final_owner or not final_repo :
333- sys .stderr .write (f"Error: Could not determine repository. Please specify --url, OR both --owner and --repo, OR ensure git remote 'origin' is configured correctly.{ error_suffix } \n " )
334- sys .exit (1 )
335-
336- if not set_repo_url_standalone (final_owner , final_repo ): # Sets global OWNER and REPO
314+ # Set global repository variables for API calls
315+ if not set_repo_url_standalone (final_owner , final_repo ):
316+ # This path should ideally not be reached if previous checks are robust.
337317 sys .stderr .write (f"Error: Could not set repository to { final_owner } /{ final_repo } . Ensure owner/repo are correct.{ error_suffix } \n " )
338318 sys .exit (1 )
339319
320+ # Determine Pull Request number
340321 pull_request_number = args .pull_number
341- current_branch_for_pr_check = None # Used to improve final error message
322+ current_branch_for_pr_check = None
342323 if not pull_request_number :
343324 sys .stderr .write ("Pull number not specified, attempting to find PR for current branch...\n " )
344325 current_branch_for_pr_check = get_current_branch_name ()
@@ -348,18 +329,18 @@ def parse_repo_url(url_string):
348329 if pull_request_number :
349330 sys .stderr .write (f"Found PR #{ pull_request_number } for branch { current_branch_for_pr_check } .\n " )
350331 else :
351- sys .stderr .write (f"No open PR found for branch { current_branch_for_pr_check } in { OWNER } /{ REPO } .\n " ) # This is informational, error below
332+ # Informational, actual error handled by `if not pull_request_number:` below
333+ sys .stderr .write (f"No open PR found for branch { current_branch_for_pr_check } in { OWNER } /{ REPO } .\n " )
352334 else :
353335 sys .stderr .write (f"Error: Could not determine current git branch. Cannot find PR automatically.{ error_suffix } \n " )
354336 sys .exit (1 )
355337
356338 if not pull_request_number :
357339 error_message = "Error: Pull request number is required."
358- if not args .pull_number and current_branch_for_pr_check : # Auto-detect branch ok, but no PR found
340+ if not args .pull_number and current_branch_for_pr_check :
359341 error_message = f"Error: Pull request number not specified and no open PR found for branch { current_branch_for_pr_check } ."
360- elif not args .pull_number and not current_branch_for_pr_check : # Should have been caught above
342+ elif not args .pull_number and not current_branch_for_pr_check : # Should have been caught and exited above.
361343 error_message = "Error: Pull request number not specified and could not determine current git branch."
362-
363344 sys .stderr .write (f"{ error_message } { error_suffix } \n " )
364345 sys .exit (1 )
365346
@@ -408,10 +389,10 @@ def parse_repo_url(url_string):
408389 if status_text == STATUS_OLD and args .exclude_old :
409390 continue
410391
411- # Track latest 'updated_at' for '--since' suggestion; 'created_at' is for display.
412392 updated_at_str = comment .get ("updated_at" )
413- if updated_at_str : # Check if updated_at_str is not None and not empty
393+ if updated_at_str :
414394 try :
395+ # Compatibility for Python < 3.11 which doesn't handle 'Z' suffix in fromisoformat
415396 if sys .version_info < (3 , 11 ):
416397 dt_str_updated = updated_at_str .replace ("Z" , "+00:00" )
417398 else :
@@ -422,12 +403,11 @@ def parse_repo_url(url_string):
422403 except ValueError :
423404 sys .stderr .write (f"Warning: Could not parse updated_at timestamp: { updated_at_str } \n " )
424405
425- # Get other comment details
426406 user = comment .get ("user" , {}).get ("login" , "Unknown user" )
427407 path = comment .get ("path" , "N/A" )
428408 body = comment .get ("body" , "" ).strip ()
429409
430- if not body :
410+ if not body : # Skip comments with no actual text content
431411 continue
432412
433413 processed_comments_count += 1
@@ -446,24 +426,19 @@ def parse_repo_url(url_string):
446426 print (f"* **URL**: <{ html_url } >\n " )
447427
448428 print ("\n ### Context:" )
449- print ("```" ) # Start of Markdown code block
429+ print ("```" )
450430 if diff_hunk and diff_hunk .strip ():
451- if args .context_lines == 0 : # User wants the full hunk
431+ if args .context_lines == 0 : # 0 means full hunk
452432 print (diff_hunk )
453- else : # User wants N lines of context (args.context_lines > 0)
433+ else : # Display header (if any) and last N lines
454434 hunk_lines = diff_hunk .split ('\n ' )
455- if hunk_lines and hunk_lines [0 ].startswith ("@@ " ):
435+ if hunk_lines and hunk_lines [0 ].startswith ("@@ " ): # Diff hunk header
456436 print (hunk_lines [0 ])
457- hunk_lines = hunk_lines [1 :] # Modify list in place for remaining operations
458-
459- # Proceed with the (potentially modified) hunk_lines
460- # If hunk_lines is empty here (e.g. original hunk was only a header that was removed),
461- # hunk_lines[-args.context_lines:] will be [], and "\n".join([]) is "",
462- # so print("") will effectively print a newline. This is acceptable.
437+ hunk_lines = hunk_lines [1 :]
463438 print ("\n " .join (hunk_lines [- args .context_lines :]))
464- else : # diff_hunk was None or empty
439+ else :
465440 print ("(No diff hunk available for this comment)" )
466- print ("```" ) # End of Markdown code block
441+ print ("```" )
467442
468443 print ("\n ### Comment:" )
469444 print (body )
@@ -473,15 +448,15 @@ def parse_repo_url(url_string):
473448
474449 if latest_activity_timestamp_obj :
475450 try :
476- # Ensure it's UTC before adding timedelta, then format
451+ # Suggest next command with '--since' pointing to just after the latest comment processed.
477452 next_since_dt = latest_activity_timestamp_obj .astimezone (timezone .utc ) + timedelta (seconds = 2 )
478453 next_since_str = next_since_dt .strftime ('%Y-%m-%dT%H:%M:%SZ' )
479454
480- new_cmd_args = [sys .executable , sys .argv [0 ]] # Start with interpreter and script path
481- i = 1 # Start checking from actual arguments in sys.argv
455+ new_cmd_args = [sys .executable , sys .argv [0 ]]
456+ i = 1
482457 while i < len (sys .argv ):
483- if sys .argv [i ] == "--since" :
484- i += 2 # Skip --since and its value
458+ if sys .argv [i ] == "--since" : # Skip existing --since argument and its value
459+ i += 2
485460 continue
486461 new_cmd_args .append (sys .argv [i ])
487462 i += 1
0 commit comments