11import json
2+ import multiprocessing
3+ import os
24import shutil
35import subprocess
46import tempfile
57import uuid
68
79from dataclasses import dataclass
8- from typing import Dict , List
10+ from typing import Dict , List , Tuple
911
1012from launchpad .utils .logging import get_logger
1113
@@ -29,7 +31,11 @@ class CwlDemangleResult:
2931class CwlDemangler :
3032 """A class to demangle Swift symbol names using the cwl-demangle tool."""
3133
32- def __init__ (self , is_type : bool = False , continue_on_error : bool = True ):
34+ def __init__ (
35+ self ,
36+ is_type : bool = False ,
37+ continue_on_error : bool = True ,
38+ ):
3339 """
3440 Initialize the CwlDemangler.
3541
@@ -40,7 +46,11 @@ def __init__(self, is_type: bool = False, continue_on_error: bool = True):
4046 self .is_type = is_type
4147 self .queue : List [str ] = []
4248 self .continue_on_error = continue_on_error
43- self .uuid = uuid .uuid4 ()
49+ self .uuid = str (uuid .uuid4 ())
50+
51+ # Disable parallel processing if LAUNCHPAD_NO_PARALLEL_DEMANGLE=true
52+ env_disable = os .environ .get ("LAUNCHPAD_NO_PARALLEL_DEMANGLE" , "" ).lower () == "true"
53+ self .use_parallel = not env_disable
4454
4555 def add_name (self , name : str ) -> None :
4656 """
@@ -63,73 +73,131 @@ def demangle_all(self) -> Dict[str, CwlDemangleResult]:
6373
6474 names = self .queue .copy ()
6575 self .queue .clear ()
66- results : Dict [str , CwlDemangleResult ] = {}
6776
6877 # Process in chunks to avoid potential issues with large inputs
69- chunk_size = 500
78+ chunk_size = 5000
79+ total_chunks = (len (names ) + chunk_size - 1 ) // chunk_size
7080
81+ chunks : List [Tuple [List [str ], int ]] = []
7182 for i in range (0 , len (names ), chunk_size ):
7283 chunk = names [i : i + chunk_size ]
73- chunk_results = self ._demangle_chunk (chunk , i )
84+ chunk_idx = i // chunk_size
85+ chunks .append ((chunk , chunk_idx ))
86+
87+ # Only use parallel processing if workload justifies multiprocessing overhead (≥4 chunks = ≥20K symbols)
88+ do_in_parallel = self .use_parallel and total_chunks >= 4
89+
90+ logger .debug (
91+ f"Starting Swift demangling: { len (names )} symbols in { total_chunks } chunks "
92+ f"of { chunk_size } ({ 'parallel' if do_in_parallel else 'sequential' } mode)"
93+ )
94+
95+ return self ._demangle_parallel (chunks ) if do_in_parallel else self ._demangle_sequential (chunks )
96+
97+ def _demangle_parallel (self , chunks : List [Tuple [List [str ], int ]]) -> Dict [str , CwlDemangleResult ]:
98+ """Demangle chunks in parallel using multiprocessing"""
99+ results : Dict [str , CwlDemangleResult ] = {}
100+
101+ try :
102+ # Prepare arguments for starmap
103+ worker_args = [
104+ (chunk , chunk_idx , self .is_type , self .continue_on_error , self .uuid ) for chunk , chunk_idx in chunks
105+ ]
106+
107+ # Process chunks in parallel
108+ # NOTE: starmap pickles the function and arguments to send to worker processes.
109+ # Current arguments are all safe to pickle:
110+ # - chunk: List[str] (standard containers with primitives)
111+ # - chunk_idx: int (primitive)
112+ # - is_type: bool (primitive)
113+ # - continue_on_error: bool (primitive)
114+ # - uuid: str (primitive)
115+ with multiprocessing .Pool (processes = 4 ) as pool :
116+ chunk_results = pool .starmap (_demangle_chunk_worker , worker_args )
117+
118+ for chunk_result in chunk_results :
119+ results .update (chunk_result )
120+
121+ except Exception :
122+ logger .exception ("Parallel demangling failed, falling back to sequential" )
123+ results = self ._demangle_sequential (chunks )
124+
125+ return results
126+
127+ def _demangle_sequential (self , chunks : List [Tuple [List [str ], int ]]) -> Dict [str , CwlDemangleResult ]:
128+ """Demangle chunks sequentially"""
129+ results : Dict [str , CwlDemangleResult ] = {}
130+
131+ for chunk , chunk_idx in chunks :
132+ chunk_results = self ._demangle_chunk (chunk , chunk_idx )
74133 results .update (chunk_results )
75134
76135 return results
77136
78137 def _demangle_chunk (self , names : List [str ], i : int ) -> Dict [str , CwlDemangleResult ]:
79- if not names :
80- logger .warning ("No names to demangle" )
138+ return _demangle_chunk_worker (names , i , self .is_type , self .continue_on_error , self .uuid )
139+
140+
141+ def _demangle_chunk_worker (
142+ chunk : List [str ],
143+ chunk_idx : int ,
144+ is_type : bool ,
145+ continue_on_error : bool ,
146+ demangle_uuid : str ,
147+ ) -> Dict [str , CwlDemangleResult ]:
148+ """Demangle a chunk of symbols. Arguments must be picklable for multiprocessing."""
149+ if not chunk :
150+ return {}
151+
152+ binary_path = shutil .which ("cwl-demangle" )
153+ if binary_path is None :
154+ logger .error ("cwl-demangle binary not found in PATH" )
155+ return {}
156+
157+ chunk_set = set (chunk )
158+ results : Dict [str , CwlDemangleResult ] = {}
159+
160+ with tempfile .NamedTemporaryFile (
161+ mode = "w" , prefix = f"cwl-demangle-{ demangle_uuid } -chunk-{ chunk_idx } -" , suffix = ".txt"
162+ ) as temp_file :
163+ temp_file .write ("\n " .join (chunk ))
164+ temp_file .flush ()
165+
166+ command_parts = [
167+ binary_path ,
168+ "batch" ,
169+ "--input" ,
170+ temp_file .name ,
171+ "--json" ,
172+ ]
173+
174+ if is_type :
175+ command_parts .append ("--isType" )
176+
177+ if continue_on_error :
178+ command_parts .append ("--continue-on-error" )
179+
180+ try :
181+ result = subprocess .run (command_parts , capture_output = True , text = True , check = True )
182+ except subprocess .CalledProcessError :
183+ logger .exception (f"cwl-demangle failed for chunk { chunk_idx } " )
81184 return {}
82185
83- binary_path = self ._get_binary_path ()
84- results : Dict [str , CwlDemangleResult ] = {}
186+ batch_result = json .loads (result .stdout )
187+
188+ for symbol_result in batch_result .get ("results" , []):
189+ mangled = symbol_result .get ("mangled" , "" )
190+ if mangled in chunk_set :
191+ demangle_result = CwlDemangleResult (
192+ name = symbol_result ["name" ],
193+ type = symbol_result ["type" ],
194+ identifier = symbol_result ["identifier" ],
195+ module = symbol_result ["module" ],
196+ testName = symbol_result ["testName" ],
197+ typeName = symbol_result ["typeName" ],
198+ description = symbol_result ["description" ],
199+ mangled = mangled ,
200+ )
201+ results [mangled ] = demangle_result
85202
86- with tempfile .NamedTemporaryFile (
87- mode = "w" , prefix = f"cwl-demangle-{ self .uuid } -chunk-{ i } -" , suffix = ".txt"
88- ) as temp_file :
89- temp_file .write ("\n " .join (names ))
90- temp_file .flush ()
91-
92- command_parts = [
93- binary_path ,
94- "batch" ,
95- "--input" ,
96- temp_file .name ,
97- "--json" ,
98- ]
99-
100- if self .is_type :
101- command_parts .append ("--isType" )
102-
103- if self .continue_on_error :
104- command_parts .append ("--continue-on-error" )
105-
106- try :
107- result = subprocess .run (command_parts , capture_output = True , text = True , check = True )
108- except subprocess .CalledProcessError :
109- logger .exception ("cwl-demangle failed" )
110- return {}
111-
112- batch_result = json .loads (result .stdout )
113-
114- for symbol_result in batch_result .get ("results" , []):
115- mangled = symbol_result .get ("mangled" , "" )
116- if mangled in names :
117- demangle_result = CwlDemangleResult (
118- name = symbol_result ["name" ],
119- type = symbol_result ["type" ],
120- identifier = symbol_result ["identifier" ],
121- module = symbol_result ["module" ],
122- testName = symbol_result ["testName" ],
123- typeName = symbol_result ["typeName" ],
124- description = symbol_result ["description" ],
125- mangled = mangled ,
126- )
127- results [mangled ] = demangle_result
128-
129- return results
130-
131- def _get_binary_path (self ) -> str :
132- """Get the path to the cwl-demangle binary."""
133- path = shutil .which ("cwl-demangle" )
134- assert path is not None
135- return path
203+ return results
0 commit comments