Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Include/cpython/pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ struct _ts {
/* Currently holds the GIL. Must be its own field to avoid data races */
int holds_gil;

/* Currently requesting the GIL */
int gil_requested;

int _whence;

/* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED).
Expand Down
4 changes: 4 additions & 0 deletions Include/internal/pycore_debug_offsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ typedef struct _Py_DebugOffsets {
uint64_t native_thread_id;
uint64_t datastack_chunk;
uint64_t status;
uint64_t holds_gil;
uint64_t gil_requested;
} thread_state;

// InterpreterFrame offset;
Expand Down Expand Up @@ -273,6 +275,8 @@ typedef struct _Py_DebugOffsets {
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
.status = offsetof(PyThreadState, _status), \
.holds_gil = offsetof(PyThreadState, holds_gil), \
.gil_requested = offsetof(PyThreadState, gil_requested), \
}, \
.interpreter_frame = { \
.size = sizeof(_PyInterpreterFrame), \
Expand Down
31 changes: 17 additions & 14 deletions Lib/profiling/sampling/collector.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
from abc import ABC, abstractmethod

# Enums are slow
THREAD_STATE_RUNNING = 0
THREAD_STATE_IDLE = 1
THREAD_STATE_GIL_WAIT = 2
THREAD_STATE_UNKNOWN = 3

STATUS = {
THREAD_STATE_RUNNING: "running",
THREAD_STATE_IDLE: "idle",
THREAD_STATE_GIL_WAIT: "gil_wait",
THREAD_STATE_UNKNOWN: "unknown",
}
# Thread status flags
try:
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
except ImportError:
# Fallback for tests or when module is not available
THREAD_STATUS_HAS_GIL = (1 << 0)
THREAD_STATUS_ON_CPU = (1 << 1)
THREAD_STATUS_UNKNOWN = (1 << 2)
THREAD_STATUS_GIL_REQUESTED = (1 << 3)

class Collector(ABC):
@abstractmethod
Expand All @@ -26,8 +23,14 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
"""Iterate over all frame stacks from all interpreters and threads."""
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
continue
# skip_idle now means: skip if thread is not actively running
# A thread is "active" if it has the GIL OR is on CPU
if skip_idle:
status_flags = thread_info.status
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
if not (has_gil or on_cpu):
continue
frames = thread_info.frame_info
if frames:
yield frames, thread_info.thread_id
237 changes: 220 additions & 17 deletions Lib/profiling/sampling/gecko_collector.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,41 @@
import itertools
import json
import os
import platform
import sys
import threading
import time

from .collector import Collector, THREAD_STATE_RUNNING
from .collector import Collector
try:
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
except ImportError:
# Fallback if module not available (shouldn't happen in normal use)
THREAD_STATUS_HAS_GIL = (1 << 0)
THREAD_STATUS_ON_CPU = (1 << 1)
THREAD_STATUS_UNKNOWN = (1 << 2)
THREAD_STATUS_GIL_REQUESTED = (1 << 3)


# Categories matching Firefox Profiler expectations
GECKO_CATEGORIES = [
{"name": "Other", "color": "grey", "subcategories": ["Other"]},
{"name": "Python", "color": "yellow", "subcategories": ["Other"]},
{"name": "Native", "color": "blue", "subcategories": ["Other"]},
{"name": "Idle", "color": "transparent", "subcategories": ["Other"]},
{"name": "GC", "color": "orange", "subcategories": ["Other"]},
{"name": "GIL", "color": "green", "subcategories": ["Other"]},
{"name": "CPU", "color": "purple", "subcategories": ["Other"]},
{"name": "Code Type", "color": "red", "subcategories": ["Other"]},
]

# Category indices
CATEGORY_OTHER = 0
CATEGORY_PYTHON = 1
CATEGORY_NATIVE = 2
CATEGORY_IDLE = 3
CATEGORY_GC = 3
CATEGORY_GIL = 4
CATEGORY_CPU = 5
CATEGORY_CODE_TYPE = 6

# Subcategory indices
DEFAULT_SUBCATEGORY = 0
Expand Down Expand Up @@ -58,6 +75,56 @@ def __init__(self, *, skip_idle=False):
self.last_sample_time = 0
self.interval = 1.0 # Will be calculated from actual sampling

# State tracking for interval markers (tid -> start_time)
self.has_gil_start = {} # Thread has the GIL
self.no_gil_start = {} # Thread doesn't have the GIL
self.on_cpu_start = {} # Thread is running on CPU
self.off_cpu_start = {} # Thread is off CPU
self.python_code_start = {} # Thread running Python code (has GIL)
self.native_code_start = {} # Thread running native code (on CPU without GIL)
self.gil_wait_start = {} # Thread waiting for GIL

# GC event tracking: track GC start time per thread
self.gc_start_per_thread = {} # tid -> start_time

# Track which threads have been initialized for state tracking
self.initialized_threads = set()

def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
active_name, inactive_name, category, current_time):
"""Track binary state transitions and emit markers.
Args:
tid: Thread ID
condition: Whether the active state is true
active_dict: Dict tracking start time of active state
inactive_dict: Dict tracking start time of inactive state
active_name: Name for active state marker
inactive_name: Name for inactive state marker
category: Gecko category for the markers
current_time: Current timestamp
"""
# On first observation of a thread, just record the current state
# without creating a marker (we don't know what the previous state was)
if tid not in self.initialized_threads:
if condition:
active_dict[tid] = current_time
else:
inactive_dict[tid] = current_time
return

# For already-initialized threads, track transitions
if condition:
active_dict.setdefault(tid, current_time)
if tid in inactive_dict:
self._add_marker(tid, inactive_name, inactive_dict.pop(tid),
current_time, category)
else:
inactive_dict.setdefault(tid, current_time)
if tid in active_dict:
self._add_marker(tid, active_name, active_dict.pop(tid),
current_time, category)

def collect(self, stack_frames):
"""Collect a sample from stack frames."""
current_time = (time.time() * 1000) - self.start_time
Expand All @@ -69,26 +136,93 @@ def collect(self, stack_frames):
) / self.sample_count
self.last_sample_time = current_time

# Process threads and track GC per thread
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
if (
self.skip_idle
and thread_info.status != THREAD_STATE_RUNNING
):
continue

frames = thread_info.frame_info
if not frames:
continue

tid = thread_info.thread_id
gc_collecting = thread_info.gc_collecting

# Initialize thread if needed
if tid not in self.threads:
self.threads[tid] = self._create_thread(tid)

thread_data = self.threads[tid]

# Decode status flags
status_flags = thread_info.status
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
gil_requested = bool(status_flags & THREAD_STATUS_GIL_REQUESTED)

# Track GIL possession (Has GIL / No GIL)
self._track_state_transition(
tid, has_gil, self.has_gil_start, self.no_gil_start,
"Has GIL", "No GIL", CATEGORY_GIL, current_time
)

# Track CPU state (On CPU / Off CPU)
self._track_state_transition(
tid, on_cpu, self.on_cpu_start, self.off_cpu_start,
"On CPU", "Off CPU", CATEGORY_CPU, current_time
)

# Track code type (Python Code / Native Code)
# This is tri-state: Python (has_gil), Native (on_cpu without gil), or Neither
if has_gil:
self._track_state_transition(
tid, True, self.python_code_start, self.native_code_start,
"Python Code", "Native Code", CATEGORY_CODE_TYPE, current_time
)
elif on_cpu:
self._track_state_transition(
tid, True, self.native_code_start, self.python_code_start,
"Native Code", "Python Code", CATEGORY_CODE_TYPE, current_time
)
else:
# Thread is idle (neither has GIL nor on CPU) - close any open code markers
# This handles the third state that _track_state_transition doesn't cover
if tid in self.initialized_threads:
if tid in self.python_code_start:
self._add_marker(tid, "Python Code", self.python_code_start.pop(tid),
current_time, CATEGORY_CODE_TYPE)
if tid in self.native_code_start:
self._add_marker(tid, "Native Code", self.native_code_start.pop(tid),
current_time, CATEGORY_CODE_TYPE)

# Track "Waiting for GIL" intervals (one-sided tracking)
if gil_requested:
self.gil_wait_start.setdefault(tid, current_time)
elif tid in self.gil_wait_start:
self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
current_time, CATEGORY_GIL)

# Track GC events - attribute to all threads that hold the GIL during GC
# (GC is interpreter-wide but runs on whichever thread(s) have the GIL)
# If GIL switches during GC, multiple threads will get GC markers
if gc_collecting and has_gil:
# Start GC marker if not already started for this thread
if tid not in self.gc_start_per_thread:
self.gc_start_per_thread[tid] = current_time
elif tid in self.gc_start_per_thread:
# End GC marker if it was running for this thread
# (either GC finished or thread lost GIL)
self._add_marker(tid, "GC Collecting", self.gc_start_per_thread.pop(tid),
current_time, CATEGORY_GC)

# Mark thread as initialized after processing all state transitions
self.initialized_threads.add(tid)

# Categorize: idle if neither has GIL nor on CPU
is_idle = not has_gil and not on_cpu

# Skip idle threads if skip_idle is enabled
if self.skip_idle and is_idle:
continue

if not frames:
continue

# Process the stack
stack_index = self._process_stack(thread_data, frames)

Expand All @@ -102,7 +236,6 @@ def collect(self, stack_frames):

def _create_thread(self, tid):
"""Create a new thread structure with processed profile format."""
import threading

# Determine if this is the main thread
try:
Expand Down Expand Up @@ -181,7 +314,7 @@ def _create_thread(self, tid):
"functionSize": [],
"length": 0,
},
# Markers - processed format
# Markers - processed format (arrays)
"markers": {
"data": [],
"name": [],
Expand Down Expand Up @@ -215,6 +348,27 @@ def _intern_string(self, s):
self.global_string_map[s] = idx
return idx

def _add_marker(self, tid, name, start_time, end_time, category):
"""Add an interval marker for a specific thread."""
if tid not in self.threads:
return

thread_data = self.threads[tid]
duration = end_time - start_time

name_idx = self._intern_string(name)
markers = thread_data["markers"]
markers["name"].append(name_idx)
markers["startTime"].append(start_time)
markers["endTime"].append(end_time)
markers["phase"].append(1) # 1 = interval marker
markers["category"].append(category)
markers["data"].append({
"type": name.replace(" ", ""),
"duration": duration,
"tid": tid
})

def _process_stack(self, thread_data, frames):
"""Process a stack and return the stack index."""
if not frames:
Expand Down Expand Up @@ -383,15 +537,63 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno):
frame_cache[frame_key] = frame_idx
return frame_idx

def _finalize_markers(self):
"""Close any open markers at the end of profiling."""
end_time = self.last_sample_time

# Close all open markers for each thread using a generic approach
marker_states = [
(self.has_gil_start, "Has GIL", CATEGORY_GIL),
(self.no_gil_start, "No GIL", CATEGORY_GIL),
(self.on_cpu_start, "On CPU", CATEGORY_CPU),
(self.off_cpu_start, "Off CPU", CATEGORY_CPU),
(self.python_code_start, "Python Code", CATEGORY_CODE_TYPE),
(self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
(self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
(self.gc_start_per_thread, "GC Collecting", CATEGORY_GC),
]

for state_dict, marker_name, category in marker_states:
for tid in list(state_dict.keys()):
self._add_marker(tid, marker_name, state_dict[tid], end_time, category)
del state_dict[tid]

def export(self, filename):
"""Export the profile to a Gecko JSON file."""

if self.sample_count > 0 and self.last_sample_time > 0:
self.interval = self.last_sample_time / self.sample_count

profile = self._build_profile()
# Spinner for progress indication
spinner = itertools.cycle(['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'])
stop_spinner = threading.Event()

def spin():
message = 'Building Gecko profile...'
while not stop_spinner.is_set():
sys.stderr.write(f'\r{next(spinner)} {message}')
sys.stderr.flush()
time.sleep(0.1)
# Clear the spinner line
sys.stderr.write('\r' + ' ' * (len(message) + 3) + '\r')
sys.stderr.flush()

spinner_thread = threading.Thread(target=spin, daemon=True)
spinner_thread.start()

try:
# Finalize any open markers before building profile
self._finalize_markers()

profile = self._build_profile()

with open(filename, "w") as f:
json.dump(profile, f, separators=(",", ":"))
with open(filename, "w") as f:
json.dump(profile, f, separators=(",", ":"))
finally:
stop_spinner.set()
spinner_thread.join(timeout=1.0)
# Small delay to ensure the clear happens
time.sleep(0.01)

print(f"Gecko profile written to {filename}")
print(
Expand All @@ -416,6 +618,7 @@ def _build_profile(self):
frame_table["length"] = len(frame_table["func"])
func_table["length"] = len(func_table["name"])
resource_table["length"] = len(resource_table["name"])
thread_data["markers"]["length"] = len(thread_data["markers"]["name"])

# Clean up internal caches
del thread_data["_stackCache"]
Expand Down
Loading
Loading