Skip to content

Commit 7315953

Browse files
committed
Good shit
1 parent 2beed97 commit 7315953

File tree

2 files changed

+352
-44
lines changed

2 files changed

+352
-44
lines changed

Lib/profiling/sampling/collector.py

Lines changed: 42 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,16 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
3636
yield frames, thread_info.thread_id
3737

3838
def _iter_async_frames(self, awaited_info_list):
39-
"""Iterate over linear stacks for all leaf tasks (hot path optimized)."""
40-
# Build adjacency graph (O(n))
39+
# Phase 1: Index tasks and build parent relationships
40+
task_map, child_to_parents, all_task_ids = self._build_task_graph(awaited_info_list)
41+
42+
# Phase 2: Find leaf tasks (tasks not awaited by anyone)
43+
leaf_task_ids = self._find_leaf_tasks(child_to_parents, all_task_ids)
44+
45+
# Phase 3: Build linear stacks via BFS from each leaf to root
46+
yield from self._build_linear_stacks(leaf_task_ids, task_map, child_to_parents)
47+
48+
def _build_task_graph(self, awaited_info_list):
4149
task_map = {}
4250
child_to_parents = {}
4351
all_task_ids = set()
@@ -48,70 +56,60 @@ def _iter_async_frames(self, awaited_info_list):
4856
task_id = task_info.task_id
4957
task_map[task_id] = (task_info, thread_id)
5058
all_task_ids.add(task_id)
59+
60+
# Store parent task IDs (not frames - those are in task_info.coroutine_stack)
5161
if task_info.awaited_by:
52-
# Store all parent coroutines, not just [0]
53-
child_to_parents[task_id] = task_info.awaited_by
62+
child_to_parents[task_id] = [p.task_name for p in task_info.awaited_by]
63+
64+
return task_map, child_to_parents, all_task_ids
5465

55-
# Identify leaf tasks (O(n))
56-
# Collect all parent task IDs from all coroutines
66+
def _find_leaf_tasks(self, child_to_parents, all_task_ids):
5767
all_parent_ids = set()
58-
for parent_coros in child_to_parents.values():
59-
for parent_coro in parent_coros:
60-
all_parent_ids.add(parent_coro.task_name)
61-
leaf_task_ids = all_task_ids - all_parent_ids
68+
for parent_ids in child_to_parents.values():
69+
all_parent_ids.update(parent_ids)
70+
return all_task_ids - all_parent_ids
6271

63-
# Build linear stacks for each leaf (O(n × depth × num_paths))
64-
# For tasks with multiple parents, we generate one stack per parent path
72+
def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parents):
6573
for leaf_id in leaf_task_ids:
66-
# Use BFS to explore all paths from leaf to root
67-
# Queue items: (current_task_id, frames_accumulated)
68-
queue = [(leaf_id, [])]
69-
visited = set()
74+
# BFS queue: (current_task_id, frames_so_far, path_for_cycle_detection)
75+
queue = [(leaf_id, [], frozenset())]
7076

7177
while queue:
72-
current_id, frames = queue.pop(0)
78+
current_id, frames, path = queue.pop(0)
7379

74-
# Avoid processing the same task twice in this path
75-
if current_id in visited:
80+
# Cycle detection
81+
if current_id in path:
7682
continue
77-
visited.add(current_id)
7883

84+
# End of path (parent ID not in task_map)
7985
if current_id not in task_map:
80-
# Reached end of path - yield if we have frames
8186
if frames:
8287
_, thread_id = task_map[leaf_id]
8388
yield frames, thread_id, leaf_id
8489
continue
8590

91+
# Process current task
8692
task_info, tid = task_map[current_id]
87-
88-
# Add this task's frames
8993
new_frames = list(frames)
94+
new_path = path | {current_id}
95+
96+
# Add all frames from all coroutines in this task
9097
if task_info.coroutine_stack:
91-
for frame in task_info.coroutine_stack[0].call_stack:
92-
new_frames.append(frame)
98+
for coro_info in task_info.coroutine_stack:
99+
for frame in coro_info.call_stack:
100+
new_frames.append(frame)
93101

94102
# Add task boundary marker
95103
task_name = task_info.task_name or "Task-" + str(task_info.task_id)
96104
new_frames.append(FrameInfo(("<task>", 0, task_name)))
97105

98-
# Get parent coroutines
99-
parent_coros = child_to_parents.get(current_id)
100-
if not parent_coros:
101-
# No parents - this is the root, yield the complete stack
102-
yield new_frames, tid, leaf_id
103-
continue
104-
105-
# For each parent coroutine, add its await frames and continue to parent task
106-
for parent_coro in parent_coros:
107-
parent_task_id = parent_coro.task_name
108-
109-
# Add the parent's await-site frames (where parent awaits this task)
110-
path_frames = list(new_frames)
111-
for frame in parent_coro.call_stack:
112-
path_frames.append(frame)
106+
# Get parent task IDs
107+
parent_ids = child_to_parents.get(current_id, [])
113108

114-
# Continue BFS with parent task
115-
# Note: parent_coro.call_stack contains the frames from the parent task,
116-
# so we should NOT add parent task's coroutine_stack again
117-
queue.append((parent_task_id, path_frames))
109+
if not parent_ids:
110+
# Root task - yield complete stack
111+
yield new_frames, tid, leaf_id
112+
else:
113+
# Continue to each parent (creates multiple paths if >1 parent)
114+
for parent_id in parent_ids:
115+
queue.append((parent_id, new_frames, new_path))

0 commit comments

Comments
 (0)