Skip to content

Commit 52c9867

Browse files
committed
Implement string table
1 parent 1d0454a commit 52c9867

File tree

5 files changed

+181
-28
lines changed

5 files changed

+181
-28
lines changed

Lib/profiling/sampling/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@
77
from .collector import Collector
88
from .pstats_collector import PstatsCollector
99
from .stack_collector import CollapsedStackCollector
10+
from .string_table import StringTable
1011

11-
__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector")
12+
__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "StringTable")

Lib/profiling/sampling/flamegraph.js

Lines changed: 72 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,50 @@
11
const EMBEDDED_DATA = {{FLAMEGRAPH_DATA}};
22

3+
// Global string table for resolving string indices
4+
let stringTable = [];
5+
6+
// Function to resolve string indices to actual strings
7+
function resolveString(index) {
8+
if (typeof index === 'number' && index >= 0 && index < stringTable.length) {
9+
return stringTable[index];
10+
}
11+
// Fallback for non-indexed strings or invalid indices
12+
return String(index);
13+
}
14+
15+
// Function to recursively resolve all string indices in flamegraph data
16+
function resolveStringIndices(node) {
17+
if (!node) return node;
18+
19+
// Create a copy to avoid mutating the original
20+
const resolved = { ...node };
21+
22+
// Resolve string fields
23+
if (typeof resolved.name === 'number') {
24+
resolved.name = resolveString(resolved.name);
25+
}
26+
if (typeof resolved.filename === 'number') {
27+
resolved.filename = resolveString(resolved.filename);
28+
}
29+
if (typeof resolved.funcname === 'number') {
30+
resolved.funcname = resolveString(resolved.funcname);
31+
}
32+
33+
// Resolve source lines if present
34+
if (Array.isArray(resolved.source)) {
35+
resolved.source = resolved.source.map(index =>
36+
typeof index === 'number' ? resolveString(index) : index
37+
);
38+
}
39+
40+
// Recursively resolve children
41+
if (Array.isArray(resolved.children)) {
42+
resolved.children = resolved.children.map(child => resolveStringIndices(child));
43+
}
44+
45+
return resolved;
46+
}
47+
348
// Python color palette - cold to hot
449
const pythonColors = [
550
"#fff4bf", // Coldest - light yellow (<1%)
@@ -100,6 +145,10 @@ function createPythonTooltip(data) {
100145
</div>`;
101146
}
102147

148+
// Resolve strings for display
149+
const funcname = resolveString(d.data.funcname) || resolveString(d.data.name);
150+
const filename = resolveString(d.data.filename) || "";
151+
103152
const tooltipHTML = `
104153
<div>
105154
<div style="color: #3776ab; font-weight: 600; font-size: 16px;
@@ -257,9 +306,9 @@ function updateSearchHighlight(searchTerm, searchInput) {
257306
let matchCount = 0;
258307
d3.selectAll("#chart rect").each(function (d) {
259308
if (d && d.data) {
260-
const name = d.data.name || "";
261-
const funcname = d.data.funcname || "";
262-
const filename = d.data.filename || "";
309+
const name = resolveString(d.data.name) || "";
310+
const funcname = resolveString(d.data.funcname) || "";
311+
const filename = resolveString(d.data.filename) || "";
263312
const term = searchTerm.toLowerCase();
264313
const matches =
265314
name.toLowerCase().includes(term) ||
@@ -317,12 +366,20 @@ function handleResize(chart, data) {
317366

318367
function initFlamegraph() {
319368
ensureLibraryLoaded();
320-
const tooltip = createPythonTooltip(EMBEDDED_DATA);
321-
const chart = createFlamegraph(tooltip, EMBEDDED_DATA.value);
322-
renderFlamegraph(chart, EMBEDDED_DATA);
369+
370+
// Extract string table if present and resolve string indices
371+
let processedData = EMBEDDED_DATA;
372+
if (EMBEDDED_DATA.strings) {
373+
stringTable = EMBEDDED_DATA.strings;
374+
processedData = resolveStringIndices(EMBEDDED_DATA);
375+
}
376+
377+
const tooltip = createPythonTooltip(processedData);
378+
const chart = createFlamegraph(tooltip, processedData.value);
379+
renderFlamegraph(chart, processedData);
323380
attachPanelControls();
324381
initSearchHandlers();
325-
handleResize(chart, EMBEDDED_DATA);
382+
handleResize(chart, processedData);
326383
}
327384

328385
if (document.readyState === "loading") {
@@ -338,7 +395,10 @@ function populateStats(data) {
338395
const functionMap = new Map();
339396

340397
function collectFunctions(node) {
341-
if (node.filename && node.funcname) {
398+
const filename = resolveString(node.filename);
399+
const funcname = resolveString(node.funcname);
400+
401+
if (filename && funcname) {
342402
// Calculate direct samples (this node's value minus children's values)
343403
let childrenValue = 0;
344404
if (node.children) {
@@ -347,23 +407,23 @@ function populateStats(data) {
347407
const directSamples = Math.max(0, node.value - childrenValue);
348408

349409
// Use file:line:funcname as key to ensure uniqueness
350-
const funcKey = `${node.filename}:${node.lineno || '?'}:${node.funcname}`;
410+
const funcKey = `${filename}:${node.lineno || '?'}:${funcname}`;
351411

352412
if (functionMap.has(funcKey)) {
353413
const existing = functionMap.get(funcKey);
354414
existing.directSamples += directSamples;
355415
existing.directPercent = (existing.directSamples / totalSamples) * 100;
356416
// Keep the most representative file/line (the one with more samples)
357417
if (directSamples > existing.maxSingleSamples) {
358-
existing.filename = node.filename;
418+
existing.filename = filename;
359419
existing.lineno = node.lineno || '?';
360420
existing.maxSingleSamples = directSamples;
361421
}
362422
} else {
363423
functionMap.set(funcKey, {
364-
filename: node.filename,
424+
filename: filename,
365425
lineno: node.lineno || '?',
366-
funcname: node.funcname,
426+
funcname: funcname,
367427
directSamples,
368428
directPercent: (directSamples / totalSamples) * 100,
369429
maxSingleSamples: directSamples

Lib/profiling/sampling/stack_collector.py

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import os
88

99
from .collector import Collector
10+
from .string_table import StringTable
1011

1112

1213
class StackTraceCollector(Collector):
@@ -50,6 +51,7 @@ def __init__(self):
5051
self._root = {"samples": 0, "children": {}}
5152
self._total_samples = 0
5253
self._func_intern = {}
54+
self._string_table = StringTable()
5355

5456
def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None):
5557
"""Set profiling statistics to include in flamegraph data."""
@@ -63,11 +65,13 @@ def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=
6365
def export(self, filename):
6466
flamegraph_data = self._convert_to_flamegraph_format()
6567

66-
# Debug output
68+
# Debug output with string table statistics
6769
num_functions = len(flamegraph_data.get("children", []))
6870
total_time = flamegraph_data.get("value", 0)
71+
string_count = len(self._string_table)
6972
print(
70-
f"Flamegraph data: {num_functions} root functions, total samples: {total_time}"
73+
f"Flamegraph data: {num_functions} root functions, total samples: {total_time}, "
74+
f"{string_count} unique strings"
7175
)
7276

7377
if num_functions == 0:
@@ -96,9 +100,14 @@ def _format_function_name(func):
96100
return f"{funcname} ({filename}:{lineno})"
97101

98102
def _convert_to_flamegraph_format(self):
99-
"""Convert aggregated trie to d3-flamegraph format."""
103+
"""Convert aggregated trie to d3-flamegraph format with string table optimization."""
100104
if self._total_samples == 0:
101-
return {"name": "No Data", "value": 0, "children": []}
105+
return {
106+
"name": self._string_table.intern("No Data"),
107+
"value": 0,
108+
"children": [],
109+
"strings": self._string_table.get_strings()
110+
}
102111

103112
def convert_children(children, min_samples):
104113
out = []
@@ -107,26 +116,33 @@ def convert_children(children, min_samples):
107116
if samples < min_samples:
108117
continue
109118

110-
name = self._format_function_name(func)
119+
# Intern all string components for maximum efficiency
120+
filename_idx = self._string_table.intern(func[0])
121+
funcname_idx = self._string_table.intern(func[2])
122+
name_idx = self._string_table.intern(self._format_function_name(func))
123+
111124
child_entry = {
112-
"name": name,
125+
"name": name_idx,
113126
"value": samples,
114127
"children": [],
115-
"filename": func[0],
128+
"filename": filename_idx,
116129
"lineno": func[1],
117-
"funcname": func[2],
130+
"funcname": funcname_idx,
118131
}
119132

120133
source = self._get_source_lines(func)
121134
if source:
122-
child_entry["source"] = source
135+
# Intern source lines for memory efficiency
136+
source_indices = [self._string_table.intern(line) for line in source]
137+
child_entry["source"] = source_indices
123138

124139
# Recurse
125140
child_entry["children"] = convert_children(
126141
node["children"], min_samples
127142
)
128143
out.append(child_entry)
129144

145+
# Sort by value (descending) then by name index for consistent ordering
130146
out.sort(key=lambda x: (-x["value"], x["name"]))
131147
return out
132148

@@ -136,16 +152,31 @@ def convert_children(children, min_samples):
136152

137153
root_children = convert_children(self._root["children"], min_samples)
138154
if not root_children:
139-
return {"name": "No significant data", "value": 0, "children": []}
155+
return {
156+
"name": self._string_table.intern("No significant data"),
157+
"value": 0,
158+
"children": [],
159+
"strings": self._string_table.get_strings()
160+
}
140161

141162
# If we only have one root child, make it the root to avoid redundant level
142163
if len(root_children) == 1:
143164
main_child = root_children[0]
144-
main_child["name"] = f"Program Root: {main_child['name']}"
165+
# Update the name to indicate it's the program root
166+
old_name = self._string_table.get_string(main_child["name"])
167+
new_name = f"Program Root: {old_name}"
168+
main_child["name"] = self._string_table.intern(new_name)
145169
main_child["stats"] = self.stats
170+
main_child["strings"] = self._string_table.get_strings()
146171
return main_child
147172

148-
return {"name": "Program Root", "value": total_samples, "children": root_children, "stats": self.stats}
173+
return {
174+
"name": self._string_table.intern("Program Root"),
175+
"value": total_samples,
176+
"children": root_children,
177+
"stats": self.stats,
178+
"strings": self._string_table.get_strings()
179+
}
149180

150181
def process_frames(self, frames):
151182
# Reverse to root->leaf
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""String table implementation for memory-efficient string storage in profiling data."""
2+
3+
class StringTable:
4+
"""A string table for interning strings and reducing memory usage."""
5+
6+
def __init__(self):
7+
self._strings = []
8+
self._string_to_index = {}
9+
10+
def intern(self, string):
11+
"""Intern a string and return its index.
12+
13+
Args:
14+
string: The string to intern
15+
16+
Returns:
17+
int: The index of the string in the table
18+
"""
19+
if not isinstance(string, str):
20+
string = str(string)
21+
22+
if string in self._string_to_index:
23+
return self._string_to_index[string]
24+
25+
index = len(self._strings)
26+
self._strings.append(string)
27+
self._string_to_index[string] = index
28+
return index
29+
30+
def get_string(self, index):
31+
"""Get a string by its index.
32+
33+
Args:
34+
index: The index of the string
35+
36+
Returns:
37+
str: The string at the given index, or empty string if invalid
38+
"""
39+
if 0 <= index < len(self._strings):
40+
return self._strings[index]
41+
return ""
42+
43+
def get_strings(self):
44+
"""Get the list of all strings in the table.
45+
46+
Returns:
47+
list: A copy of the strings list
48+
"""
49+
return self._strings.copy()
50+
51+
def __len__(self):
52+
"""Return the number of strings in the table."""
53+
return len(self._strings)

Lib/test/test_profiling/test_sampling_profiler.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,11 @@ def test_flamegraph_collector_basic(self):
437437

438438
# Empty collector should produce 'No Data'
439439
data = collector._convert_to_flamegraph_format()
440-
self.assertIn(data["name"], ("No Data", "No significant data"))
440+
# With string table, name is now an index - resolve it using the strings array
441+
strings = data.get("strings", [])
442+
name_index = data.get("name", 0)
443+
resolved_name = strings[name_index] if isinstance(name_index, int) and 0 <= name_index < len(strings) else str(name_index)
444+
self.assertIn(resolved_name, ("No Data", "No significant data"))
441445

442446
# Test collecting sample data
443447
test_frames = [
@@ -451,14 +455,18 @@ def test_flamegraph_collector_basic(self):
451455
# Convert and verify structure: func2 -> func1 with counts = 1
452456
data = collector._convert_to_flamegraph_format()
453457
# Expect promotion: root is the single child (func2), with func1 as its only child
454-
name = data.get("name", "")
458+
strings = data.get("strings", [])
459+
name_index = data.get("name", 0)
460+
name = strings[name_index] if isinstance(name_index, int) and 0 <= name_index < len(strings) else str(name_index)
455461
self.assertIsInstance(name, str)
456462
self.assertTrue(name.startswith("Program Root: "))
457463
self.assertIn("func2 (file.py:20)", name) # formatted name
458464
children = data.get("children", [])
459465
self.assertEqual(len(children), 1)
460466
child = children[0]
461-
self.assertIn("func1 (file.py:10)", child["name"]) # formatted name
467+
child_name_index = child.get("name", 0)
468+
child_name = strings[child_name_index] if isinstance(child_name_index, int) and 0 <= child_name_index < len(strings) else str(child_name_index)
469+
self.assertIn("func1 (file.py:10)", child_name) # formatted name
462470
self.assertEqual(child["value"], 1)
463471

464472
def test_flamegraph_collector_export(self):

0 commit comments

Comments
 (0)