From 94da00ee7d7568b1856b739c9c4582582caa14d5 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Fri, 14 May 2021 21:56:30 +0100 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=91=8C=20IMPROVE:=20Filter=20PRs=20by?= =?UTF-8?q?=20checking=20for=20commits=20in=20branch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a branch is specified PRs are filtered by checking their PR base branch. This fails to pick up all PRs when multiple PRs are merged into one branch, followed by that branch being merged into the mainline via a single PR. If the `--since` argument is a git ref (_not_ a date) this is used to get a list of commits in the requested branch after `--since`. The list of PRs is then filtered by checking whether the PR commit appears in this list of branch commits. If the commit history for merged PRs/branches is maintained this should ensure PRs merged into one branch that are subsequently merged altogether into another branch are picked up. Closes https://github.com/executablebooks/github-activity/issues/50 --- github_activity/github_activity.py | 65 +++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/github_activity/github_activity.py b/github_activity/github_activity.py index dc283e9..16f55ea 100644 --- a/github_activity/github_activity.py +++ b/github_activity/github_activity.py @@ -258,10 +258,32 @@ def generate_activity_md( data["kind"] = data["url"].map(lambda a: "issue" if "issues/" in a else "pr") # Filter the PRs by branch (or ref) if given + # + # If `since` is a git ref then check commits instead of the PR base to handle + # multi-branch repos where multiple PRs are merged into one branch, and that + # branch is subsequently merged into the mainline in another PR. + # + # This is the equivalent of `git merge-base --is-ancestor $sha $branch` + # https://stackoverflow.com/questions/43535132/given-a-commit-id-how-to-determine-if-current-branch-contains-the-commit if branch is not None: - index_names = data[ - (data["kind"] == "pr") & (data["baseRefName"] != branch) - ].index + if data.since_is_git_ref: + branch_commits = set(_get_commit_shas(org, repo, branch, since)) + index_names = data[ + ~data.apply( + lambda r: bool( + r["kind"] != "pr" + or ( + r["mergeCommit"] + and r["mergeCommit"]["oid"] in branch_commits + ) + ), + axis=1, + ) + ].index + else: + index_names = data[ + (data["kind"] == "pr") & (data["baseRefName"] != branch) + ].index data.drop(index_names, inplace=True) if data.empty: return @@ -540,12 +562,19 @@ def _get_datetime_and_type(org, repo, datetime_or_git_ref): ) -def _get_datetime_from_git_ref(org, repo, ref): - """Return a datetime from a git reference.""" +def _get_commit_from_git_ref(org, repo, ref): + """Return a GitHub commit from a git reference.""" response = requests.get(f"https://api.github.com/repos/{org}/{repo}/commits/{ref}") response.raise_for_status() - return dateutil.parser.parse(response.json()["commit"]["committer"]["date"]) + return response.json() + + +def _get_datetime_from_git_ref(org, repo, ref): + """Return a datetime from a git reference.""" + + commit = _get_commit_from_git_ref(org, repo, ref) + return dateutil.parser.parse(commit["commit"]["committer"]["date"]) def _get_latest_tag(org, repo): @@ -553,3 +582,27 @@ def _get_latest_tag(org, repo): out = run("git describe --tags".split(), stdout=PIPE) tag = out.stdout.decode().rsplit("-", 2)[0] return tag + + +def _get_commit_shas(org, repo, branch, since): + """Return all commit SHAs in a branch after `since` which must be a git ref.""" + + since_sha = _get_commit_from_git_ref(org, repo, since)["sha"] + branch_shas = [] + page_size = 100 + page = 0 + while True: + # https://docs.github.com/en/rest/reference/repos#commits + page += 1 + response = requests.get( + f"https://api.github.com/repos/{org}/{repo}/commits?sha={branch}&per_page={page_size}&page={page}" + ) + response.raise_for_status() + commits = response.json() + for c in commits: + if c["sha"] == since_sha: + return branch_shas + branch_shas.append(c["sha"]) + if len(commits) < page_size: + break + raise ValueError(f"Git ref {since_sha} not found in {branch}") From 86114c9723330bb789bd4a4c08eeadb1a9ff9ab8 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Fri, 14 May 2021 23:16:53 +0100 Subject: [PATCH 2/2] Simplify branch commit inclusion check --- github_activity/github_activity.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/github_activity/github_activity.py b/github_activity/github_activity.py index 16f55ea..8e4a5a2 100644 --- a/github_activity/github_activity.py +++ b/github_activity/github_activity.py @@ -269,12 +269,12 @@ def generate_activity_md( if data.since_is_git_ref: branch_commits = set(_get_commit_shas(org, repo, branch, since)) index_names = data[ - ~data.apply( - lambda r: bool( - r["kind"] != "pr" - or ( - r["mergeCommit"] - and r["mergeCommit"]["oid"] in branch_commits + data.apply( + lambda r: ( + r["kind"] == "pr" + and ( + not r["mergeCommit"] + or r["mergeCommit"]["oid"] not in branch_commits ) ), axis=1,