Skip to content

Commit 1d0454a

Browse files
lkollarpablogsal
authored andcommitted
Reduce memory usage of stack collectors
The stack collector base class keeps all frames until export() is called, which causes significant unnecessary memory usage. Instead, we can process the frames on the fly in the collect call by dispatching the aggregation logic to the subclass through the process_frames method.
1 parent efc08c5 commit 1d0454a

File tree

2 files changed

+123
-158
lines changed

2 files changed

+123
-158
lines changed

Lib/profiling/sampling/stack_collector.py

Lines changed: 75 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -10,48 +10,46 @@
1010

1111

1212
class StackTraceCollector(Collector):
13-
def __init__(self):
14-
self.call_trees = []
15-
self.function_samples = collections.defaultdict(int)
16-
17-
def _process_frames(self, frames):
18-
"""Process a single thread's frame stack."""
19-
if not frames:
20-
return
21-
22-
# Store the complete call stack (reverse order - root first)
23-
call_tree = list(reversed(frames))
24-
self.call_trees.append(call_tree)
25-
26-
# Count samples per function
27-
for frame in frames:
28-
self.function_samples[frame] += 1
29-
3013
def collect(self, stack_frames):
3114
for frames in self._iter_all_frames(stack_frames):
32-
self._process_frames(frames)
15+
if not frames:
16+
continue
17+
self.process_frames(frames)
18+
19+
def process_frames(self, frames):
20+
pass
3321

3422

3523
class CollapsedStackCollector(StackTraceCollector):
24+
def __init__(self):
25+
self.stack_counter = collections.Counter()
26+
27+
def process_frames(self, frames):
28+
call_tree = tuple(reversed(frames))
29+
self.stack_counter[call_tree] += 1
30+
3631
def export(self, filename):
37-
stack_counter = collections.Counter()
38-
for call_tree in self.call_trees:
39-
# Call tree is already in root->leaf order
32+
lines = []
33+
for call_tree, count in self.stack_counter.items():
4034
stack_str = ";".join(
4135
f"{os.path.basename(f[0])}:{f[2]}:{f[1]}" for f in call_tree
4236
)
43-
stack_counter[stack_str] += 1
37+
lines.append((stack_str, count))
38+
39+
lines.sort(key=lambda x: (-x[1], x[0]))
4440

4541
with open(filename, "w") as f:
46-
for stack, count in stack_counter.items():
42+
for stack, count in lines:
4743
f.write(f"{stack} {count}\n")
4844
print(f"Collapsed stack output written to {filename}")
4945

5046

5147
class FlamegraphCollector(StackTraceCollector):
5248
def __init__(self):
53-
super().__init__()
5449
self.stats = {}
50+
self._root = {"samples": 0, "children": {}}
51+
self._total_samples = 0
52+
self._func_intern = {}
5553

5654
def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None):
5755
"""Set profiling statistics to include in flamegraph data."""
@@ -98,105 +96,78 @@ def _format_function_name(func):
9896
return f"{funcname} ({filename}:{lineno})"
9997

10098
def _convert_to_flamegraph_format(self):
101-
"""Convert call trees to d3-flamegraph format with optimized hierarchy building"""
102-
if not self.call_trees:
99+
"""Convert aggregated trie to d3-flamegraph format."""
100+
if self._total_samples == 0:
103101
return {"name": "No Data", "value": 0, "children": []}
104102

105-
unique_functions = set()
106-
for call_tree in self.call_trees:
107-
unique_functions.update(call_tree)
108-
109-
func_to_name = {
110-
func: self._format_function_name(func) for func in unique_functions
111-
}
112-
113-
root = {"name": "root", "children": {}, "samples": 0}
114-
115-
for call_tree in self.call_trees:
116-
current_node = root
117-
current_node["samples"] += 1
118-
119-
for func in call_tree:
120-
func_name = func_to_name[func] # Use pre-computed name
121-
122-
if func_name not in current_node["children"]:
123-
current_node["children"][func_name] = {
124-
"name": func_name,
125-
"func": func,
126-
"children": {},
127-
"samples": 0,
128-
"filename": func[0],
129-
"lineno": func[1],
130-
"funcname": func[2],
131-
}
132-
133-
current_node = current_node["children"][func_name]
134-
current_node["samples"] += 1
135-
136-
def convert_node(node, min_samples=1):
137-
if node["samples"] < min_samples:
138-
return None
139-
140-
source_code = None
141-
if "func" in node:
142-
source_code = self._get_source_lines(node["func"])
143-
144-
result = {
145-
"name": node["name"],
146-
"value": node["samples"],
147-
"children": [],
148-
}
149-
150-
if "filename" in node:
151-
result.update(
152-
{
153-
"filename": node["filename"],
154-
"lineno": node["lineno"],
155-
"funcname": node["funcname"],
156-
}
103+
def convert_children(children, min_samples):
104+
out = []
105+
for func, node in children.items():
106+
samples = node["samples"]
107+
if samples < min_samples:
108+
continue
109+
110+
name = self._format_function_name(func)
111+
child_entry = {
112+
"name": name,
113+
"value": samples,
114+
"children": [],
115+
"filename": func[0],
116+
"lineno": func[1],
117+
"funcname": func[2],
118+
}
119+
120+
source = self._get_source_lines(func)
121+
if source:
122+
child_entry["source"] = source
123+
124+
# Recurse
125+
child_entry["children"] = convert_children(
126+
node["children"], min_samples
157127
)
128+
out.append(child_entry)
158129

159-
if source_code:
160-
result["source"] = source_code
161-
162-
# Recursively convert children
163-
child_nodes = []
164-
for child_name, child_node in node["children"].items():
165-
child_result = convert_node(child_node, min_samples)
166-
if child_result:
167-
child_nodes.append(child_result)
168-
169-
# Sort children by sample count (descending)
170-
child_nodes.sort(key=lambda x: x["value"], reverse=True)
171-
result["children"] = child_nodes
172-
173-
return result
130+
out.sort(key=lambda x: (-x["value"], x["name"]))
131+
return out
174132

175133
# Filter out very small functions (less than 0.1% of total samples)
176-
total_samples = len(self.call_trees)
134+
total_samples = self._total_samples
177135
min_samples = max(1, int(total_samples * 0.001))
178136

179-
converted_root = convert_node(root, min_samples)
180-
181-
if not converted_root or not converted_root["children"]:
137+
root_children = convert_children(self._root["children"], min_samples)
138+
if not root_children:
182139
return {"name": "No significant data", "value": 0, "children": []}
183140

184141
# If we only have one root child, make it the root to avoid redundant level
185-
if len(converted_root["children"]) == 1:
186-
main_child = converted_root["children"][0]
142+
if len(root_children) == 1:
143+
main_child = root_children[0]
187144
main_child["name"] = f"Program Root: {main_child['name']}"
188145
main_child["stats"] = self.stats
189146
return main_child
190147

191-
converted_root["name"] = "Program Root"
192-
converted_root["stats"] = self.stats
193-
return converted_root
148+
return {"name": "Program Root", "value": total_samples, "children": root_children, "stats": self.stats}
149+
150+
def process_frames(self, frames):
151+
# Reverse to root->leaf
152+
call_tree = reversed(frames)
153+
self._root["samples"] += 1
154+
self._total_samples += 1
155+
156+
current = self._root
157+
for func in call_tree:
158+
func = self._func_intern.setdefault(func, func)
159+
children = current["children"]
160+
node = children.get(func)
161+
if node is None:
162+
node = {"samples": 0, "children": {}}
163+
children[func] = node
164+
node["samples"] += 1
165+
current = node
194166

195167
def _get_source_lines(self, func):
196-
filename, lineno, funcname = func
168+
filename, lineno, _ = func
197169

198170
try:
199-
# Get several lines around the function definition
200171
lines = []
201172
start_line = max(1, lineno - 2)
202173
end_line = lineno + 3
@@ -210,7 +181,6 @@ def _get_source_lines(self, func):
210181
return lines if lines else None
211182

212183
except Exception:
213-
# If we can't get source code, return None
214184
return None
215185

216186
def _create_flamegraph_html(self, data):

Lib/test/test_profiling/test_sampling_profiler.py

Lines changed: 48 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -272,25 +272,26 @@ def test_collapsed_stack_collector_with_empty_and_deep_stacks(self):
272272

273273
# Test with empty frames
274274
collector.collect([])
275-
self.assertEqual(len(collector.call_trees), 0)
275+
self.assertEqual(len(collector.stack_counter), 0)
276276

277277
# Test with single frame stack
278278
test_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func")])])]
279279
collector.collect(test_frames)
280-
self.assertEqual(len(collector.call_trees), 1)
281-
self.assertEqual(collector.call_trees[0], [("file.py", 10, "func")])
280+
self.assertEqual(len(collector.stack_counter), 1)
281+
((path,), count), = collector.stack_counter.items()
282+
self.assertEqual(path, ("file.py", 10, "func"))
283+
self.assertEqual(count, 1)
282284

283285
# Test with very deep stack
284286
deep_stack = [(f"file{i}.py", i, f"func{i}") for i in range(100)]
285287
test_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, deep_stack)])]
286288
collector = CollapsedStackCollector()
287289
collector.collect(test_frames)
288-
self.assertEqual(len(collector.call_trees[0]), 100)
289-
# Check it's properly reversed
290-
self.assertEqual(
291-
collector.call_trees[0][0], ("file99.py", 99, "func99")
292-
)
293-
self.assertEqual(collector.call_trees[0][-1], ("file0.py", 0, "func0"))
290+
# One aggregated path with 100 frames (reversed)
291+
(path_tuple,), = (collector.stack_counter.keys(),)
292+
self.assertEqual(len(path_tuple), 100)
293+
self.assertEqual(path_tuple[0], ("file99.py", 99, "func99"))
294+
self.assertEqual(path_tuple[-1], ("file0.py", 0, "func0"))
294295

295296
def test_pstats_collector_basic(self):
296297
"""Test basic PstatsCollector functionality."""
@@ -382,27 +383,20 @@ def test_collapsed_stack_collector_basic(self):
382383
collector = CollapsedStackCollector()
383384

384385
# Test empty state
385-
self.assertEqual(len(collector.call_trees), 0)
386-
self.assertEqual(len(collector.function_samples), 0)
386+
self.assertEqual(len(collector.stack_counter), 0)
387387

388388
# Test collecting sample data
389389
test_frames = [
390390
MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
391391
]
392392
collector.collect(test_frames)
393393

394-
# Should store call tree (reversed)
395-
self.assertEqual(len(collector.call_trees), 1)
396-
expected_tree = [("file.py", 20, "func2"), ("file.py", 10, "func1")]
397-
self.assertEqual(collector.call_trees[0], expected_tree)
398-
399-
# Should count function samples
400-
self.assertEqual(
401-
collector.function_samples[("file.py", 10, "func1")], 1
402-
)
403-
self.assertEqual(
404-
collector.function_samples[("file.py", 20, "func2")], 1
405-
)
394+
# Should store one reversed path
395+
self.assertEqual(len(collector.stack_counter), 1)
396+
(path, count), = collector.stack_counter.items()
397+
expected_tree = (("file.py", 20, "func2"), ("file.py", 10, "func1"))
398+
self.assertEqual(path, expected_tree)
399+
self.assertEqual(count, 1)
406400

407401
def test_collapsed_stack_collector_export(self):
408402
collapsed_out = tempfile.NamedTemporaryFile(delete=False)
@@ -441,9 +435,9 @@ def test_flamegraph_collector_basic(self):
441435
"""Test basic FlamegraphCollector functionality."""
442436
collector = FlamegraphCollector()
443437

444-
# Test empty state (inherits from StackTraceCollector)
445-
self.assertEqual(len(collector.call_trees), 0)
446-
self.assertEqual(len(collector.function_samples), 0)
438+
# Empty collector should produce 'No Data'
439+
data = collector._convert_to_flamegraph_format()
440+
self.assertIn(data["name"], ("No Data", "No significant data"))
447441

448442
# Test collecting sample data
449443
test_frames = [
@@ -454,18 +448,18 @@ def test_flamegraph_collector_basic(self):
454448
]
455449
collector.collect(test_frames)
456450

457-
# Should store call tree (reversed)
458-
self.assertEqual(len(collector.call_trees), 1)
459-
expected_tree = [("file.py", 20, "func2"), ("file.py", 10, "func1")]
460-
self.assertEqual(collector.call_trees[0], expected_tree)
461-
462-
# Should count function samples
463-
self.assertEqual(
464-
collector.function_samples[("file.py", 10, "func1")], 1
465-
)
466-
self.assertEqual(
467-
collector.function_samples[("file.py", 20, "func2")], 1
468-
)
451+
# Convert and verify structure: func2 -> func1 with counts = 1
452+
data = collector._convert_to_flamegraph_format()
453+
# Expect promotion: root is the single child (func2), with func1 as its only child
454+
name = data.get("name", "")
455+
self.assertIsInstance(name, str)
456+
self.assertTrue(name.startswith("Program Root: "))
457+
self.assertIn("func2 (file.py:20)", name) # formatted name
458+
children = data.get("children", [])
459+
self.assertEqual(len(children), 1)
460+
child = children[0]
461+
self.assertIn("func1 (file.py:10)", child["name"]) # formatted name
462+
self.assertEqual(child["value"], 1)
469463

470464
def test_flamegraph_collector_export(self):
471465
"""Test flamegraph HTML export functionality."""
@@ -1508,28 +1502,29 @@ def test_collapsed_stack_with_recursion(self):
15081502
for frames in recursive_frames:
15091503
collector.collect([frames])
15101504

1511-
# Should capture both call trees
1512-
self.assertEqual(len(collector.call_trees), 2)
1513-
1514-
# First tree should be longer (deeper recursion)
1515-
tree1 = collector.call_trees[0]
1516-
tree2 = collector.call_trees[1]
1505+
# Should capture both call paths
1506+
self.assertEqual(len(collector.stack_counter), 2)
15171507

1518-
# Trees should be different lengths due to different recursion depths
1519-
self.assertNotEqual(len(tree1), len(tree2))
1508+
# First path should be longer (deeper recursion) than the second
1509+
paths = list(collector.stack_counter.keys())
1510+
lengths = [len(p) for p in paths]
1511+
self.assertNotEqual(lengths[0], lengths[1])
15201512

15211513
# Both should contain factorial calls
1522-
self.assertTrue(any("factorial" in str(frame) for frame in tree1))
1523-
self.assertTrue(any("factorial" in str(frame) for frame in tree2))
1514+
self.assertTrue(any(any(f[2] == "factorial" for f in p) for p in paths))
15241515

1525-
# Function samples should count all occurrences
1516+
# Verify total occurrences via aggregation
15261517
factorial_key = ("factorial.py", 10, "factorial")
15271518
main_key = ("main.py", 5, "main")
15281519

1529-
# factorial appears 5 times total (3 + 2)
1530-
self.assertEqual(collector.function_samples[factorial_key], 5)
1531-
# main appears 2 times total
1532-
self.assertEqual(collector.function_samples[main_key], 2)
1520+
def total_occurrences(func):
1521+
total = 0
1522+
for path, count in collector.stack_counter.items():
1523+
total += sum(1 for f in path if f == func) * count
1524+
return total
1525+
1526+
self.assertEqual(total_occurrences(factorial_key), 5)
1527+
self.assertEqual(total_occurrences(main_key), 2)
15331528

15341529

15351530
@requires_subprocess()

0 commit comments

Comments
 (0)