Skip to content

Commit d158ad9

Browse files
committed
Convert change detection to a Python script
1 parent bb5c687 commit d158ad9

File tree

2 files changed

+218
-107
lines changed

2 files changed

+218
-107
lines changed
Lines changed: 43 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: Reusable change detection
22

3-
on: # yamllint disable-line rule:truthy
3+
on: # yamllint disable-line rule:truthy
44
workflow_call:
55
outputs:
66
# Some of the referenced steps set outputs conditionally and there may be
@@ -50,124 +50,60 @@ jobs:
5050
name: Compute changed files
5151
runs-on: ubuntu-latest
5252
timeout-minutes: 10
53+
env:
54+
branch_base: 'origin/${{ github.event.pull_request.base.ref }}'
55+
branch_pr: 'origin/${{ github.event.pull_request.head.ref }}'
56+
commits: ${{ github.event.pull_request.commits }}
57+
refspec_base: '+${{ github.event.pull_request.base.sha }}:remotes/origin/${{ github.event.pull_request.base.ref }}'
58+
refspec_pr: '+${{ github.event.pull_request.head.sha }}:remotes/origin/${{ github.event.pull_request.head.ref }}'
5359
outputs:
5460
config-hash: ${{ steps.config-hash.outputs.hash }}
55-
run-cifuzz: ${{ steps.check.outputs.run-cifuzz }}
56-
run-docs: ${{ steps.docs-changes.outputs.run-docs }}
57-
run-hypothesis: ${{ steps.check.outputs.run-hypothesis }}
58-
run-tests: ${{ steps.check.outputs.run-tests }}
59-
run-win-msi: ${{ steps.win-msi-changes.outputs.run-win-msi }}
61+
run-cifuzz: ${{ steps.changes.outputs.run-cifuzz }}
62+
run-docs: ${{ steps.changes.outputs.run-docs }}
63+
run-hypothesis: ${{ steps.changes.outputs.run-hypothesis }}
64+
run-tests: ${{ steps.changes.outputs.run-tests }}
65+
run-win-msi: ${{ steps.changes.outputs.run-win-msi }}
6066
steps:
67+
- uses: actions/setup-python@v5
68+
with:
69+
python-version: "3"
70+
6171
- run: >-
6272
echo '${{ github.event_name }}'
73+
6374
- uses: actions/checkout@v4
6475
with:
6576
persist-credentials: false
66-
- name: Check for source changes
67-
id: check
77+
ref: >-
78+
${{
79+
github.event_name == 'pull_request'
80+
&& github.event.pull_request.head.sha
81+
|| ''
82+
}}
83+
84+
# Adapted from https://github.com/actions/checkout/issues/520#issuecomment-1167205721
85+
- name: Fetch commits to get branch diff
86+
if: github.event_name == 'pull_request'
6887
run: |
69-
if [ -z "$GITHUB_BASE_REF" ]; then
70-
echo "run-tests=true" >> "$GITHUB_OUTPUT"
71-
else
72-
git fetch origin "$GITHUB_BASE_REF" --depth=1
73-
# git diff "origin/$GITHUB_BASE_REF..." (3 dots) may be more
74-
# reliable than git diff "origin/$GITHUB_BASE_REF.." (2 dots),
75-
# but it requires to download more commits (this job uses
76-
# "git fetch --depth=1").
77-
#
78-
# git diff "origin/$GITHUB_BASE_REF..." (3 dots) works with Git
79-
# 2.26, but Git 2.28 is stricter and fails with "no merge base".
80-
#
81-
# git diff "origin/$GITHUB_BASE_REF.." (2 dots) should be enough on
82-
# GitHub, since GitHub starts by merging origin/$GITHUB_BASE_REF
83-
# into the PR branch anyway.
84-
#
85-
# https://github.com/python/core-workflow/issues/373
86-
grep_ignore_args=(
87-
# file extensions
88-
-e '\.md$'
89-
-e '\.rst$'
90-
# top-level folders
91-
-e '^Doc/'
92-
-e '^Misc/'
93-
# configuration files
94-
-e '^\.github/CODEOWNERS$'
95-
-e '^\.pre-commit-config\.yaml$'
96-
-e '\.ruff\.toml$'
97-
-e 'mypy\.ini$'
98-
)
99-
git diff --name-only "origin/$GITHUB_BASE_REF.." \
100-
| grep -qvE "${grep_ignore_args[@]}" \
101-
&& echo "run-tests=true" >> "$GITHUB_OUTPUT" || true
102-
fi
88+
# Fetch enough history to find a common ancestor commit (aka merge-base):
89+
git fetch origin "${refspec_pr}" --depth=$(( commits + 1 )) \
90+
--no-tags --prune --no-recurse-submodules
91+
92+
# This should get the oldest commit in the local fetched history (which may not be the commit the PR branched from):
93+
COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 "${branch_pr}" )
94+
DATE=$( git log --date=iso8601 --format=%cd "${COMMON_ANCESTOR}" )
95+
96+
# Get all commits since that commit date from the base branch (eg: master or main):
97+
git fetch origin "${refspec_base}" --shallow-since="${DATE}" \
98+
--no-tags --prune --no-recurse-submodules
10399
104-
# Check if we should run hypothesis tests
105-
GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}
106-
echo "$GIT_BRANCH"
107-
if $(echo "$GIT_BRANCH" | grep -q -w '3\.\(8\|9\|10\|11\)'); then
108-
echo "Branch too old for hypothesis tests"
109-
echo "run-hypothesis=false" >> "$GITHUB_OUTPUT"
110-
else
111-
echo "Run hypothesis tests"
112-
echo "run-hypothesis=true" >> "$GITHUB_OUTPUT"
113-
fi
100+
# We only want to run tests on PRs when related files are changed,
101+
# or when someone triggers a manual workflow run.
102+
- name: Compute changed files
103+
id: changes
104+
run: python Tools/build/compute-changes.py "${branch_base}" "${branch_pr}"
114105

115-
# oss-fuzz maintains a configuration for fuzzing the main branch of
116-
# CPython, so CIFuzz should be run only for code that is likely to be
117-
# merged into the main branch; compatibility with older branches may
118-
# be broken.
119-
FUZZ_RELEVANT_FILES='(\.c$|\.h$|\.cpp$|^configure$|^\.github/workflows/build\.yml$|^Modules/_xxtestfuzz)'
120-
if [ "$GITHUB_BASE_REF" = "main" ] && [ "$(git diff --name-only "origin/$GITHUB_BASE_REF.." | grep -qE $FUZZ_RELEVANT_FILES; echo $?)" -eq 0 ]; then
121-
# The tests are pretty slow so they are executed only for PRs
122-
# changing relevant files.
123-
echo "Run CIFuzz tests"
124-
echo "run-cifuzz=true" >> "$GITHUB_OUTPUT"
125-
else
126-
echo "Branch too old for CIFuzz tests; or no C files were changed"
127-
echo "run-cifuzz=false" >> "$GITHUB_OUTPUT"
128-
fi
129106
- name: Compute hash for config cache key
130107
id: config-hash
131108
run: |
132109
echo "hash=${{ hashFiles('configure', 'configure.ac', '.github/workflows/build.yml') }}" >> "$GITHUB_OUTPUT"
133-
- name: Get a list of the changed documentation-related files
134-
if: github.event_name == 'pull_request'
135-
id: changed-docs-files
136-
uses: Ana06/get-changed-files@v2.3.0
137-
with:
138-
filter: |
139-
Doc/**
140-
Misc/**
141-
.github/workflows/reusable-docs.yml
142-
format: csv # works for paths with spaces
143-
- name: Check for docs changes
144-
# We only want to run this on PRs when related files are changed,
145-
# or when user triggers manual workflow run.
146-
if: >-
147-
(
148-
github.event_name == 'pull_request'
149-
&& steps.changed-docs-files.outputs.added_modified_renamed != ''
150-
) || github.event_name == 'workflow_dispatch'
151-
id: docs-changes
152-
run: |
153-
echo "run-docs=true" >> "${GITHUB_OUTPUT}"
154-
- name: Get a list of the MSI installer-related files
155-
if: github.event_name == 'pull_request'
156-
id: changed-win-msi-files
157-
uses: Ana06/get-changed-files@v2.3.0
158-
with:
159-
filter: |
160-
Tools/msi/**
161-
.github/workflows/reusable-windows-msi.yml
162-
format: csv # works for paths with spaces
163-
- name: Check for changes in MSI installer-related files
164-
# We only want to run this on PRs when related files are changed,
165-
# or when user triggers manual workflow run.
166-
if: >-
167-
(
168-
github.event_name == 'pull_request'
169-
&& steps.changed-win-msi-files.outputs.added_modified_renamed != ''
170-
) || github.event_name == 'workflow_dispatch'
171-
id: win-msi-changes
172-
run: |
173-
echo "run-win-msi=true" >> "${GITHUB_OUTPUT}"

Tools/build/compute-changes.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import subprocess
5+
from dataclasses import dataclass
6+
from collections.abc import Set
7+
from pathlib import Path
8+
9+
GITHUB_WORKFLOWS_PATH = Path(".github/workflows")
10+
GITHUB_CODEOWNERS_PATH = Path(".github/CODEOWNERS")
11+
CONFIGURATION_FILE_NAMES = frozenset(
12+
{".ruff.toml", "mypy.ini", ".pre-commit-config.yaml"}
13+
)
14+
SUFFIXES_DOCUMENTATION = frozenset({".rst", ".md"})
15+
SUFFIXES_C_OR_CPP = frozenset({".c", ".h", ".cpp"})
16+
17+
18+
@dataclass(kw_only=True, slots=True)
19+
class Outputs:
20+
run_ci_fuzz: bool = False
21+
run_docs: bool = False
22+
run_hypothesis: bool = False
23+
run_tests: bool = False
24+
run_win_msi: bool = False
25+
26+
27+
def compute_changes():
28+
target_branch = get_git_base_branch()
29+
changed_files = get_changed_files()
30+
outputs = process_changed_files(changed_files)
31+
outputs = process_target_branch(outputs, target_branch)
32+
33+
if outputs.run_tests:
34+
print("Run tests")
35+
36+
if outputs.run_hypothesis:
37+
print("Run hypothesis tests")
38+
39+
if outputs.run_ci_fuzz:
40+
print("Run CIFuzz tests")
41+
else:
42+
print("Branch too old for CIFuzz tests; or no C files were changed")
43+
44+
if outputs.run_docs:
45+
print("Build documentation")
46+
47+
if outputs.run_win_msi:
48+
print("Build Windows MSI")
49+
50+
write_github_output(outputs)
51+
52+
53+
def get_changed_files(ref_a: str = "main", ref_b: str = "HEAD") -> Set[Path]:
54+
"""List the files changed between two Git refs, filtered by change type."""
55+
changed_files_result = subprocess.run(
56+
("git", "diff", "--name-only", f"{ref_a}...{ref_b}", "--"),
57+
capture_output=True,
58+
check=True,
59+
encoding="utf-8",
60+
)
61+
changed_files = changed_files_result.stdout.strip().splitlines()
62+
return frozenset(map(Path, filter(None, map(str.strip, changed_files))))
63+
64+
65+
def get_git_base_branch() -> str:
66+
git_branch = os.environ.get("GITHUB_BASE_REF", "")
67+
git_branch = git_branch.removeprefix("refs/heads/")
68+
print(f"target branch: {git_branch!r}")
69+
return git_branch
70+
71+
72+
def process_changed_files(changed_files: Set[Path]) -> Outputs:
73+
run_tests = False
74+
run_ci_fuzz = False
75+
run_docs = False
76+
run_win_msi = False
77+
78+
for file in changed_files:
79+
file_name = file.name
80+
file_suffix = file.suffix
81+
file_parts = file.parts
82+
83+
# Documentation files
84+
doc_or_misc = file_parts[0] in {"Doc", "Misc"}
85+
doc_file = file_suffix in SUFFIXES_DOCUMENTATION or doc_or_misc
86+
87+
if file.parent == GITHUB_WORKFLOWS_PATH:
88+
if file_name == "build.yml":
89+
run_tests = run_ci_fuzz = True
90+
if file_name == "reusable-docs.yml":
91+
run_docs = True
92+
if file_name == "reusable-windows-msi.yml":
93+
run_win_msi = True
94+
95+
if not (
96+
doc_file
97+
or file == GITHUB_CODEOWNERS_PATH
98+
or file_name in CONFIGURATION_FILE_NAMES
99+
):
100+
run_tests = True
101+
102+
# The fuzz tests are pretty slow so they are executed only for PRs
103+
# changing relevant files.
104+
if file_suffix in SUFFIXES_C_OR_CPP:
105+
run_ci_fuzz = True
106+
if file_parts[:2] in {
107+
("configure",),
108+
("Modules", "_xxtestfuzz"),
109+
}:
110+
run_ci_fuzz = True
111+
112+
# Get a list of the changed documentation-related files
113+
# Check for docs changes
114+
# We only want to run this on PRs when related files are changed,
115+
# or when user triggers manual workflow run.
116+
if doc_file:
117+
run_docs = True
118+
119+
# Get a list of the MSI installer-related files
120+
# Check for changes in MSI installer-related files
121+
# We only want to run this on PRs when related files are changed,
122+
# or when user triggers manual workflow run.
123+
if file_parts[:2] == ("Tools", "msi"):
124+
run_win_msi = True
125+
126+
return Outputs(
127+
run_ci_fuzz=run_ci_fuzz,
128+
run_docs=run_docs,
129+
run_tests=run_tests,
130+
run_win_msi=run_win_msi,
131+
)
132+
133+
134+
def process_target_branch(outputs: Outputs, git_branch: str) -> Outputs:
135+
if not os.environ.get("GITHUB_BASE_REF", ""):
136+
outputs.run_tests = True
137+
138+
# Check if we should run hypothesis tests
139+
if git_branch in {"3.8", "3.9", "3.10", "3.11"}:
140+
print("Branch too old for hypothesis tests")
141+
outputs.run_hypothesis = False
142+
else:
143+
outputs.run_hypothesis = outputs.run_tests
144+
145+
# oss-fuzz maintains a configuration for fuzzing the main branch of
146+
# CPython, so CIFuzz should be run only for code that is likely to be
147+
# merged into the main branch; compatibility with older branches may
148+
# be broken.
149+
if git_branch != "main":
150+
outputs.run_ci_fuzz = False
151+
152+
return outputs
153+
154+
155+
def write_github_output(outputs: Outputs) -> None:
156+
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables
157+
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-output-parameter
158+
if "GITHUB_OUTPUT" not in os.environ:
159+
print("GITHUB_OUTPUT not defined!")
160+
return
161+
162+
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as f:
163+
f.write(f"run-cifuzz={bool_lower(outputs.run_ci_fuzz)}")
164+
f.write(f"run-docs={bool_lower(outputs.run_docs)}")
165+
f.write(f"run-hypothesis={bool_lower(outputs.run_hypothesis)}")
166+
f.write(f"run-tests={bool_lower(outputs.run_tests)}")
167+
f.write(f"run-win-msi={bool_lower(outputs.run_win_msi)}")
168+
169+
170+
def bool_lower(value: bool, /) -> str:
171+
return "true" if value else "false"
172+
173+
174+
if __name__ == "__main__":
175+
compute_changes()

0 commit comments

Comments
 (0)