Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
d145747
LLVM 20 bump
savannahostrowski Sep 18, 2025
d86b66e
Merge main
savannahostrowski Sep 19, 2025
9324b14
Add flags for testing
savannahostrowski Sep 19, 2025
f3bb6b9
Merge branch 'main' into llvm-20
savannahostrowski Sep 20, 2025
b6e7981
Merge main
savannahostrowski Oct 6, 2025
e867644
Fix windows
savannahostrowski Oct 6, 2025
13e9f5b
Merge main
savannahostrowski Oct 7, 2025
84781b4
Merge branch 'main' into llvm-20
savannahostrowski Oct 8, 2025
0034f14
Download binaries from GitHub releases
emmatyping Aug 6, 2025
cc98d30
Add hash checking
emmatyping Aug 7, 2025
76842eb
Apply Emma's commits for grabbing binaries from release artifacts
savannahostrowski Oct 8, 2025
a732cec
Merge branch 'main' into llvm-20
savannahostrowski Oct 11, 2025
e8395ce
Fix up LLVM via release artifacts
savannahostrowski Oct 11, 2025
d1e4363
Merge branch 'main' into llvm-20
savannahostrowski Oct 12, 2025
01aed67
Remove model flags for x86_64 darwin causing GOT relocation issues
savannahostrowski Oct 12, 2025
94f1a89
Clean up
savannahostrowski Oct 12, 2025
e6450de
Only patch x86_64 GOT relocations when relaxation succeeds
savannahostrowski Oct 13, 2025
1adf827
Revert "Only patch x86_64 GOT relocations when relaxation succeeds"
savannahostrowski Oct 13, 2025
b9bfacf
mcmodel=large
savannahostrowski Oct 14, 2025
57c44ee
Add macro to handle debug
savannahostrowski Oct 16, 2025
081ee86
fno-pic
savannahostrowski Oct 16, 2025
0b773f9
remove fno-pic
savannahostrowski Oct 16, 2025
c78af6f
remove hack
savannahostrowski Oct 18, 2025
d715cf2
Trampoline attempt
savannahostrowski Oct 18, 2025
38e11b9
Touch up and add better comments
savannahostrowski Oct 19, 2025
74eb9a4
Merge main
savannahostrowski Oct 19, 2025
ca95652
More clean up
savannahostrowski Oct 19, 2025
47153a5
📜🤖 Added by blurb_it.
blurb-it[bot] Oct 19, 2025
7b2df52
Redupe LLVM version reference from jit.yml
savannahostrowski Oct 21, 2025
3a5af39
Merge branch 'llvm-20' of https://github.com/savannahostrowski/cpytho…
savannahostrowski Oct 21, 2025
450dd09
Simplify fetching LLVM from bin-deps
savannahostrowski Oct 21, 2025
5da3349
Remove duplicate 20 reference in jit.yml
savannahostrowski Oct 21, 2025
620cd4f
Fix flags
savannahostrowski Oct 21, 2025
d1a68ab
Reduce DATA_ALIGN to 8 bytes
savannahostrowski Oct 21, 2025
f3d46bd
Merge branch 'main' into llvm-20
savannahostrowski Oct 21, 2025
4b35c2f
Update Devcontainer readme
savannahostrowski Oct 22, 2025
ffcb68e
Merge branch 'llvm-20' of https://github.com/savannahostrowski/cpytho…
savannahostrowski Oct 22, 2025
0540ba3
Merge branch 'main' into llvm-20
savannahostrowski Oct 30, 2025
618a4a6
Revert CI simplication and address comments for memcpy
savannahostrowski Oct 31, 2025
2e9ba92
Address comments about get_externals and trampoline helper
savannahostrowski Oct 31, 2025
a21cb31
Merge branch 'main' into llvm-20
savannahostrowski Oct 31, 2025
795f466
Restore get_externals
savannahostrowski Oct 31, 2025
b880f26
Fix spacing
savannahostrowski Oct 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/jit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
- true
- false
llvm:
- 19
- 20
include:
- target: i686-pc-windows-msvc/msvc
architecture: Win32
Expand Down Expand Up @@ -138,7 +138,7 @@ jobs:
fail-fast: false
matrix:
llvm:
- 19
- 20
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -166,7 +166,7 @@ jobs:
fail-fast: false
matrix:
llvm:
- 19
- 20
steps:
- uses: actions/checkout@v4
with:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update JIT compilation to use LLVM 20 at build time.
63 changes: 51 additions & 12 deletions PCbuild/get_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import os
import pathlib
import shutil
import sys
import time
import urllib.error
Expand All @@ -22,15 +23,13 @@ def retrieve_with_retries(download_location, output_path, reporthook,
)
except (urllib.error.URLError, ConnectionError) as ex:
if attempt == max_retries:
msg = f"Download from {download_location} failed."
raise OSError(msg) from ex
raise OSError(f'Download from {download_location} failed.') from ex
time.sleep(2.25**attempt)
else:
return resp


def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
repo = f'cpython-{"bin" if binary else "source"}-deps'
repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
reporthook = None
if verbose:
Expand All @@ -44,6 +43,23 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
return filename


def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz'
reporthook = None
if verbose:
reporthook = print
tarball_dir.mkdir(parents=True, exist_ok=True)
output_path = tarball_dir / f'{tag}.tar.xz'
retrieve_with_retries(url, output_path, reporthook)
return output_path


def extract_tarball(externals_dir, tarball_path, tag):
output_path = externals_dir / tag
shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
return output_path


def extract_zip(externals_dir, zip_path):
with zipfile.ZipFile(os.fspath(zip_path)) as zf:
zf.extractall(os.fspath(externals_dir))
Expand All @@ -55,6 +71,8 @@ def parse_args():
p.add_argument('-v', '--verbose', action='store_true')
p.add_argument('-b', '--binary', action='store_true',
help='Is the dependency in the binary repo?')
p.add_argument('-r', '--release', action='store_true',
help='Download from GitHub release assets instead of branch')
p.add_argument('-O', '--organization',
help='Organization owning the deps repos', default='python')
p.add_argument('-e', '--externals-dir', type=pathlib.Path,
Expand All @@ -67,15 +85,36 @@ def parse_args():

def main():
args = parse_args()
zip_path = fetch_zip(
args.tag,
args.externals_dir / 'zips',
org=args.organization,
binary=args.binary,
verbose=args.verbose,
)
final_name = args.externals_dir / args.tag
extracted = extract_zip(args.externals_dir, zip_path)

# Check if the dependency already exists in externals/ directory
# (either already downloaded/extracted, or checked into the git tree)
if final_name.exists():
if args.verbose:
print(f'{args.tag} already exists at {final_name}, skipping download.')
return

# Determine download method: release artifacts for large deps (like LLVM),
# otherwise zip download from GitHub branches
if args.release:
tarball_path = fetch_release(
args.tag,
args.externals_dir / 'tarballs',
org=args.organization,
verbose=args.verbose,
)
extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
else:
# Use zip download from GitHub branches
# (cpython-bin-deps if --binary, cpython-source-deps otherwise)
zip_path = fetch_zip(
args.tag,
args.externals_dir / 'zips',
org=args.organization,
binary=args.binary,
verbose=args.verbose,
)
extracted = extract_zip(args.externals_dir, zip_path)
for wait in [1, 2, 3, 5, 8, 0]:
try:
extracted.replace(final_name)
Expand Down
8 changes: 6 additions & 2 deletions PCbuild/get_externals.bat
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4
if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18
if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0
if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-20.1.8.0

for %%b in (%binaries%) do (
if exist "%EXTERNALS_DIR%\%%b" (
Expand All @@ -92,7 +92,11 @@ for %%b in (%binaries%) do (
git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b"
) else (
echo.Fetching %%b...
%PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b
if "%%b"=="llvm-20.1.8.0" (
%PYTHON% -E "%PCBUILD%\get_external.py" --release --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
) else (
%PYTHON% -E "%PCBUILD%\get_external.py" --binary --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
)
)
)

Expand Down
72 changes: 58 additions & 14 deletions Python/jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,17 +444,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
}

void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);

#include "jit_stencils.h"

#if defined(__aarch64__) || defined(_M_ARM64)
#define TRAMPOLINE_SIZE 16
#define DATA_ALIGN 8
#elif defined(__x86_64__) && defined(__APPLE__)
// LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
// range.
#define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment
#define DATA_ALIGN 8
#else
#define TRAMPOLINE_SIZE 0
#define DATA_ALIGN 1
#endif

// Get the trampoline memory location for a given symbol ordinal.
static unsigned char *
get_trampoline_slot(int ordinal, jit_state *state)
{
const uint32_t symbol_mask = 1 << (ordinal % 32);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);

// Count the number of set bits in the trampoline mask lower than ordinal
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}

unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
return trampoline;
}

// Generate and patch AArch64 trampolines. The symbols to jump to are stored
// in the jit_stencils.h in the symbols_map.
void
Expand All @@ -471,20 +496,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
return;
}

// Masking is done modulo 32 as the mask is stored as an array of uint32_t
const uint32_t symbol_mask = 1 << (ordinal % 32);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);

// Count the number of set bits in the trampoline mask lower than ordinal,
// this gives the index into the array of trampolines.
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}

uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
// Out of range - need a trampoline
uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);


/* Generate the trampoline
Expand All @@ -501,6 +514,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
patch_aarch64_26r(location, (uintptr_t)p);
}

// Generate and patch x86_64 trampolines.
void
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
{
uint64_t value = (uintptr_t)symbols_map[ordinal];
int64_t range = (int64_t)value - 4 - (int64_t)location;

// If we are in range of 32 signed bits, we can patch directly
if (range >= -(1LL << 31) && range < (1LL << 31)) {
patch_32r(location, value - 4);
return;
}

// Out of range - need a trampoline
unsigned char *trampoline = get_trampoline_slot(ordinal, state);

/* Generate the trampoline (14 bytes, padded to 16):
0: ff 25 00 00 00 00 jmp *(%rip)
6: XX XX XX XX XX XX XX XX (64-bit target address)

Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
*/
trampoline[0] = 0xFF;
trampoline[1] = 0x25;
memset(trampoline + 2, 0, 4);
memcpy(trampoline + 6, &value, 8);

// Patch the call site to call the trampoline instead
patch_32r(location, (uintptr_t)trampoline - 4);
}

static void
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
{
Expand Down
20 changes: 10 additions & 10 deletions Tools/jit/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,32 @@ Python 3.11 or newer is required to build the JIT.

The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).

LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.

It's easy to install all of the required tools:

### Linux

Install LLVM 19 on Ubuntu/Debian:
Install LLVM 20 on Ubuntu/Debian:

```sh
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 19
sudo ./llvm.sh 20
```

Install LLVM 19 on Fedora Linux 40 or newer:
Install LLVM 20 on Fedora Linux 40 or newer:

```sh
sudo dnf install 'clang(major) = 19' 'llvm(major) = 19'
sudo dnf install 'clang(major) = 20' 'llvm(major) = 20'
```

### macOS

Install LLVM 19 with [Homebrew](https://brew.sh):
Install LLVM 20 with [Homebrew](https://brew.sh):

```sh
brew install llvm@19
brew install llvm@20
```

Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
Expand All @@ -43,18 +43,18 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri

LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`.

Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**

Alternatively, you can use [chocolatey](https://chocolatey.org):

```sh
choco install llvm --version=19.1.0
choco install llvm --version=20.1.8
```

### Dev Containers

If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no
need to install LLVM as the Fedora 41 base image includes LLVM 19 out of the box.
need to install LLVM as the Fedora 42 base image includes LLVM 20 out of the box.

## Building

Expand Down
4 changes: 2 additions & 2 deletions Tools/jit/_llvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import _targets


_LLVM_VERSION = "19"
_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0"
_LLVM_VERSION = "20"
_EXTERNALS_LLVM_TAG = "llvm-20.1.8.0"

_P = typing.ParamSpec("_P")
_R = typing.TypeVar("_R")
Expand Down
17 changes: 17 additions & 0 deletions Tools/jit/_stencils.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,23 @@ def process_relocations(self, known_symbols: dict[str, int]) -> None:
self._trampolines.add(ordinal)
hole.addend = ordinal
hole.symbol = None
# x86_64 Darwin trampolines for external symbols
elif (
hole.kind == "X86_64_RELOC_BRANCH"
and hole.value is HoleValue.ZERO
and hole.symbol not in self.symbols
):
hole.func = "patch_x86_64_trampoline"
hole.need_state = True
assert hole.symbol is not None
if hole.symbol in known_symbols:
ordinal = known_symbols[hole.symbol]
else:
ordinal = len(known_symbols)
known_symbols[hole.symbol] = ordinal
self._trampolines.add(ordinal)
hole.addend = ordinal
hole.symbol = None
self.data.pad(8)
for stencil in [self.code, self.data]:
for hole in stencil.holes:
Expand Down
10 changes: 3 additions & 7 deletions Tools/jit/_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,6 @@ async def _compile(
"-fno-asynchronous-unwind-tables",
# Don't call built-in functions that we can't find or patch:
"-fno-builtin",
# Emit relaxable 64-bit calls/jumps, so we don't have to worry about
# about emitting in-range trampolines for out-of-range targets.
# We can probably remove this and emit trampolines in the future:
"-fno-plt",
# Don't call stack-smashing canaries that we can't find or patch:
"-fno-stack-protector",
"-std=c11",
Expand Down Expand Up @@ -571,14 +567,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
host = "aarch64-pc-windows-msvc"
condition = "defined(_M_ARM64)"
args = ["-fms-runtime-lib=dll", "-fplt"]
args = ["-fms-runtime-lib=dll"]
optimizer = _optimizers.OptimizerAArch64
target = _COFF64(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
host = "aarch64-unknown-linux-gnu"
condition = "defined(__aarch64__) && defined(__linux__)"
# -mno-outline-atomics: Keep intrinsics from being emitted.
args = ["-fpic", "-mno-outline-atomics"]
args = ["-fpic", "-mno-outline-atomics", "-fno-plt"]
optimizer = _optimizers.OptimizerAArch64
target = _ELF(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"i686-pc-windows-msvc", host):
Expand All @@ -602,7 +598,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
host = "x86_64-unknown-linux-gnu"
condition = "defined(__x86_64__) && defined(__linux__)"
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0", "-fno-plt"]
optimizer = _optimizers.OptimizerX86
target = _ELF(host, condition, args=args, optimizer=optimizer)
else:
Expand Down
Loading