Skip to content

Commit 7551f63

Browse files
[iOS] Improve demangling performance (#483)
1 parent e7f13f1 commit 7551f63

File tree

2 files changed

+164
-81
lines changed

2 files changed

+164
-81
lines changed
Lines changed: 128 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import json
2+
import multiprocessing
3+
import os
24
import shutil
35
import subprocess
46
import tempfile
57
import uuid
68

79
from dataclasses import dataclass
8-
from typing import Dict, List
10+
from typing import Dict, List, Tuple
911

1012
from launchpad.utils.logging import get_logger
1113

@@ -29,7 +31,11 @@ class CwlDemangleResult:
2931
class CwlDemangler:
3032
"""A class to demangle Swift symbol names using the cwl-demangle tool."""
3133

32-
def __init__(self, is_type: bool = False, continue_on_error: bool = True):
34+
def __init__(
35+
self,
36+
is_type: bool = False,
37+
continue_on_error: bool = True,
38+
):
3339
"""
3440
Initialize the CwlDemangler.
3541
@@ -40,7 +46,11 @@ def __init__(self, is_type: bool = False, continue_on_error: bool = True):
4046
self.is_type = is_type
4147
self.queue: List[str] = []
4248
self.continue_on_error = continue_on_error
43-
self.uuid = uuid.uuid4()
49+
self.uuid = str(uuid.uuid4())
50+
51+
# Disable parallel processing if LAUNCHPAD_NO_PARALLEL_DEMANGLE=true
52+
env_disable = os.environ.get("LAUNCHPAD_NO_PARALLEL_DEMANGLE", "").lower() == "true"
53+
self.use_parallel = not env_disable
4454

4555
def add_name(self, name: str) -> None:
4656
"""
@@ -63,73 +73,131 @@ def demangle_all(self) -> Dict[str, CwlDemangleResult]:
6373

6474
names = self.queue.copy()
6575
self.queue.clear()
66-
results: Dict[str, CwlDemangleResult] = {}
6776

6877
# Process in chunks to avoid potential issues with large inputs
69-
chunk_size = 500
78+
chunk_size = 5000
79+
total_chunks = (len(names) + chunk_size - 1) // chunk_size
7080

81+
chunks: List[Tuple[List[str], int]] = []
7182
for i in range(0, len(names), chunk_size):
7283
chunk = names[i : i + chunk_size]
73-
chunk_results = self._demangle_chunk(chunk, i)
84+
chunk_idx = i // chunk_size
85+
chunks.append((chunk, chunk_idx))
86+
87+
# Only use parallel processing if workload justifies multiprocessing overhead (≥4 chunks = ≥20K symbols)
88+
do_in_parallel = self.use_parallel and total_chunks >= 4
89+
90+
logger.debug(
91+
f"Starting Swift demangling: {len(names)} symbols in {total_chunks} chunks "
92+
f"of {chunk_size} ({'parallel' if do_in_parallel else 'sequential'} mode)"
93+
)
94+
95+
return self._demangle_parallel(chunks) if do_in_parallel else self._demangle_sequential(chunks)
96+
97+
def _demangle_parallel(self, chunks: List[Tuple[List[str], int]]) -> Dict[str, CwlDemangleResult]:
98+
"""Demangle chunks in parallel using multiprocessing"""
99+
results: Dict[str, CwlDemangleResult] = {}
100+
101+
try:
102+
# Prepare arguments for starmap
103+
worker_args = [
104+
(chunk, chunk_idx, self.is_type, self.continue_on_error, self.uuid) for chunk, chunk_idx in chunks
105+
]
106+
107+
# Process chunks in parallel
108+
# NOTE: starmap pickles the function and arguments to send to worker processes.
109+
# Current arguments are all safe to pickle:
110+
# - chunk: List[str] (standard containers with primitives)
111+
# - chunk_idx: int (primitive)
112+
# - is_type: bool (primitive)
113+
# - continue_on_error: bool (primitive)
114+
# - uuid: str (primitive)
115+
with multiprocessing.Pool(processes=4) as pool:
116+
chunk_results = pool.starmap(_demangle_chunk_worker, worker_args)
117+
118+
for chunk_result in chunk_results:
119+
results.update(chunk_result)
120+
121+
except Exception:
122+
logger.exception("Parallel demangling failed, falling back to sequential")
123+
results = self._demangle_sequential(chunks)
124+
125+
return results
126+
127+
def _demangle_sequential(self, chunks: List[Tuple[List[str], int]]) -> Dict[str, CwlDemangleResult]:
128+
"""Demangle chunks sequentially"""
129+
results: Dict[str, CwlDemangleResult] = {}
130+
131+
for chunk, chunk_idx in chunks:
132+
chunk_results = self._demangle_chunk(chunk, chunk_idx)
74133
results.update(chunk_results)
75134

76135
return results
77136

78137
def _demangle_chunk(self, names: List[str], i: int) -> Dict[str, CwlDemangleResult]:
79-
if not names:
80-
logger.warning("No names to demangle")
138+
return _demangle_chunk_worker(names, i, self.is_type, self.continue_on_error, self.uuid)
139+
140+
141+
def _demangle_chunk_worker(
142+
chunk: List[str],
143+
chunk_idx: int,
144+
is_type: bool,
145+
continue_on_error: bool,
146+
demangle_uuid: str,
147+
) -> Dict[str, CwlDemangleResult]:
148+
"""Demangle a chunk of symbols. Arguments must be picklable for multiprocessing."""
149+
if not chunk:
150+
return {}
151+
152+
binary_path = shutil.which("cwl-demangle")
153+
if binary_path is None:
154+
logger.error("cwl-demangle binary not found in PATH")
155+
return {}
156+
157+
chunk_set = set(chunk)
158+
results: Dict[str, CwlDemangleResult] = {}
159+
160+
with tempfile.NamedTemporaryFile(
161+
mode="w", prefix=f"cwl-demangle-{demangle_uuid}-chunk-{chunk_idx}-", suffix=".txt"
162+
) as temp_file:
163+
temp_file.write("\n".join(chunk))
164+
temp_file.flush()
165+
166+
command_parts = [
167+
binary_path,
168+
"batch",
169+
"--input",
170+
temp_file.name,
171+
"--json",
172+
]
173+
174+
if is_type:
175+
command_parts.append("--isType")
176+
177+
if continue_on_error:
178+
command_parts.append("--continue-on-error")
179+
180+
try:
181+
result = subprocess.run(command_parts, capture_output=True, text=True, check=True)
182+
except subprocess.CalledProcessError:
183+
logger.exception(f"cwl-demangle failed for chunk {chunk_idx}")
81184
return {}
82185

83-
binary_path = self._get_binary_path()
84-
results: Dict[str, CwlDemangleResult] = {}
186+
batch_result = json.loads(result.stdout)
187+
188+
for symbol_result in batch_result.get("results", []):
189+
mangled = symbol_result.get("mangled", "")
190+
if mangled in chunk_set:
191+
demangle_result = CwlDemangleResult(
192+
name=symbol_result["name"],
193+
type=symbol_result["type"],
194+
identifier=symbol_result["identifier"],
195+
module=symbol_result["module"],
196+
testName=symbol_result["testName"],
197+
typeName=symbol_result["typeName"],
198+
description=symbol_result["description"],
199+
mangled=mangled,
200+
)
201+
results[mangled] = demangle_result
85202

86-
with tempfile.NamedTemporaryFile(
87-
mode="w", prefix=f"cwl-demangle-{self.uuid}-chunk-{i}-", suffix=".txt"
88-
) as temp_file:
89-
temp_file.write("\n".join(names))
90-
temp_file.flush()
91-
92-
command_parts = [
93-
binary_path,
94-
"batch",
95-
"--input",
96-
temp_file.name,
97-
"--json",
98-
]
99-
100-
if self.is_type:
101-
command_parts.append("--isType")
102-
103-
if self.continue_on_error:
104-
command_parts.append("--continue-on-error")
105-
106-
try:
107-
result = subprocess.run(command_parts, capture_output=True, text=True, check=True)
108-
except subprocess.CalledProcessError:
109-
logger.exception("cwl-demangle failed")
110-
return {}
111-
112-
batch_result = json.loads(result.stdout)
113-
114-
for symbol_result in batch_result.get("results", []):
115-
mangled = symbol_result.get("mangled", "")
116-
if mangled in names:
117-
demangle_result = CwlDemangleResult(
118-
name=symbol_result["name"],
119-
type=symbol_result["type"],
120-
identifier=symbol_result["identifier"],
121-
module=symbol_result["module"],
122-
testName=symbol_result["testName"],
123-
typeName=symbol_result["typeName"],
124-
description=symbol_result["description"],
125-
mangled=mangled,
126-
)
127-
results[mangled] = demangle_result
128-
129-
return results
130-
131-
def _get_binary_path(self) -> str:
132-
"""Get the path to the cwl-demangle binary."""
133-
path = shutil.which("cwl-demangle")
134-
assert path is not None
135-
return path
203+
return results

tests/integration/test_cwl_demangle.py

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import os
2+
3+
from unittest import mock
4+
15
from launchpad.utils.apple.cwl_demangle import CwlDemangler, CwlDemangleResult
26

37

@@ -65,36 +69,47 @@ def test_demangle_all_success(self):
6569
== "_$s6Sentry0A18UserFeedbackWidgetC18RootViewControllerC6config6buttonAeA0abC13ConfigurationC_AA0abcd6ButtonF0Ctcfc"
6670
)
6771

68-
def test_demangle_all_chunked_processing(self):
69-
"""Test that chunked processing works with many names."""
72+
def test_parallel_processing(self):
73+
"""Test demangling with 20k+ symbols (covers chunking and parallel mode)."""
7074
demangler = CwlDemangler(continue_on_error=True)
7175

72-
# Generate Swift mangled names by cycling through letters
73-
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
74-
symbols_needed = 600
75-
76-
for i in range(symbols_needed):
77-
letter1 = letters[i % len(letters)]
78-
letter2 = letters[(i // len(letters)) % len(letters)]
79-
letter3 = letters[(i // (len(letters) * len(letters))) % len(letters)]
80-
module_name = f"Test{letter1}{letter2}"
81-
symbol_name = f"Symbol{letter3}{i % 100}"
82-
mangled_name = f"_$s{len(module_name)}{module_name}{len(symbol_name)}{symbol_name}"
83-
demangler.add_name(mangled_name)
76+
# Generate 20k symbols (4 chunks at 5k each)
77+
symbols_needed = 20000
78+
symbols = self._generate_symbols(symbols_needed)
79+
for symbol in symbols:
80+
demangler.add_name(symbol)
8481

8582
result = demangler.demangle_all()
8683

8784
assert len(result) == symbols_needed
88-
for i in range(symbols_needed):
85+
# Spot check some symbols
86+
for symbol in symbols[::1000]: # Every 1000th symbol
87+
assert symbol in result
88+
assert isinstance(result[symbol], CwlDemangleResult)
89+
90+
def test_environment_variable_disables_parallel(self):
91+
"""Test LAUNCHPAD_NO_PARALLEL_DEMANGLE env var disables parallel."""
92+
# Test with env var unset
93+
with mock.patch.dict(os.environ, {}, clear=False):
94+
os.environ.pop("LAUNCHPAD_NO_PARALLEL_DEMANGLE", None)
95+
demangler = CwlDemangler()
96+
assert demangler.use_parallel is True
97+
98+
# Test with "true"
99+
with mock.patch.dict(os.environ, {"LAUNCHPAD_NO_PARALLEL_DEMANGLE": "true"}):
100+
demangler = CwlDemangler()
101+
assert demangler.use_parallel is False
102+
103+
def _generate_symbols(self, count: int) -> list[str]:
104+
"""Generate valid Swift mangled symbols."""
105+
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
106+
symbols = []
107+
for i in range(count):
89108
letter1 = letters[i % len(letters)]
90109
letter2 = letters[(i // len(letters)) % len(letters)]
91110
letter3 = letters[(i // (len(letters) * len(letters))) % len(letters)]
92-
93111
module_name = f"Test{letter1}{letter2}"
94112
symbol_name = f"Symbol{letter3}{i % 100}"
95113
mangled_name = f"_$s{len(module_name)}{module_name}{len(symbol_name)}{symbol_name}"
96-
97-
assert mangled_name in result
98-
# Check that each result is a CwlDemangleResult instance
99-
assert isinstance(result[mangled_name], CwlDemangleResult)
100-
assert result[mangled_name].mangled == mangled_name
114+
symbols.append(mangled_name)
115+
return symbols

0 commit comments

Comments
 (0)