1+ #!/usr/bin/env python3
2+ """
3+ Analysis script for stress test results.
4+
5+ This script processes the stress_test_adaptive.matrix directory and generates:
6+ 1. vector.tsv - Contains average error and density values for each configuration
7+ 2. metadata.tsv - Contains sparse attention configuration settings
8+
9+ Usage:
10+ python benchmark/scripts/analyse_stress_tests.py
11+ """
12+
13+ import json
14+ import os
15+ import re
16+ from pathlib import Path
17+ from typing import Dict , List , Tuple , Any
18+ import argparse
19+
20+
21+ def parse_config_name (config_name : str ) -> Dict [str , float ]:
22+ """Parse configuration name to extract parameters.
23+
24+ Args:
25+ config_name: Configuration name like "adaptive_sampling.sink_0.001_window_0.001_heavy_0.005_base_0.01_epsilon_0.01_delta_0.01"
26+ or "oracle_top_k_0.5.sink_0.02_window_0.02"
27+ or "oracle_top_p_0.9999.sink_0.001_window_0.001"
28+
29+ Returns:
30+ Dictionary with parsed parameters
31+ """
32+ # Extract parameters using regex for different configuration types
33+
34+ # Pattern for adaptive_sampling
35+ adaptive_pattern = r"adaptive_sampling\.sink_([\d.]+)_window_([\d.]+)_heavy_([\d.]+)_base_([\d.]+)_epsilon_([\d.]+)_delta_([\d.]+)"
36+ adaptive_match = re .match (adaptive_pattern , config_name )
37+
38+ if adaptive_match :
39+ return {
40+ "config_type" : "adaptive_sampling" ,
41+ "sink_size" : float (adaptive_match .group (1 )),
42+ "window_size" : float (adaptive_match .group (2 )),
43+ "heavy_size" : float (adaptive_match .group (3 )),
44+ "base_rate_sampling" : float (adaptive_match .group (4 )),
45+ "epsilon" : float (adaptive_match .group (5 )),
46+ "delta" : float (adaptive_match .group (6 ))
47+ }
48+
49+ # Pattern for oracle_top_k
50+ top_k_pattern = r"oracle_top_k_([\d.]+)\.sink_([\d.]+)_window_([\d.]+)"
51+ top_k_match = re .match (top_k_pattern , config_name )
52+
53+ if top_k_match :
54+ return {
55+ "config_type" : "oracle_top_k" ,
56+ "top_k" : float (top_k_match .group (1 )),
57+ "sink_size" : float (top_k_match .group (2 )),
58+ "window_size" : float (top_k_match .group (3 ))
59+ }
60+
61+ # Pattern for oracle_top_p
62+ top_p_pattern = r"oracle_top_p_([\d.]+)\.sink_([\d.]+)_window_([\d.]+)"
63+ top_p_match = re .match (top_p_pattern , config_name )
64+
65+ if top_p_match :
66+ return {
67+ "config_type" : "oracle_top_p" ,
68+ "top_p" : float (top_p_match .group (1 )),
69+ "sink_size" : float (top_p_match .group (2 )),
70+ "window_size" : float (top_p_match .group (3 ))
71+ }
72+
73+ # If no pattern matches, return empty dict
74+ return {"config_type" : "unknown" }
75+
76+
77+ def load_config_file (config_path : Path ) -> Dict [str , Any ]:
78+ """Load configuration from JSON file.
79+
80+ Args:
81+ config_path: Path to config.json file
82+
83+ Returns:
84+ Configuration dictionary
85+ """
86+ with open (config_path , 'r' ) as f :
87+ return json .load (f )
88+
89+
90+ def load_micro_metrics (metrics_path : Path ) -> List [Dict [str , Any ]]:
91+ """Load micro metrics from JSONL file.
92+
93+ Args:
94+ metrics_path: Path to micro_metrics.jsonl file
95+
96+ Returns:
97+ List of metric entries
98+ """
99+ metrics = []
100+ with open (metrics_path , 'r' ) as f :
101+ for line in f :
102+ if line .strip ():
103+ metrics .append (json .loads (line ))
104+ return metrics
105+
106+
107+ def process_experiment_directory (exp_dir : Path ) -> Tuple [List [Dict [str , Any ]], Dict [str , Any ]]:
108+ """Process a single experiment directory.
109+
110+ Args:
111+ exp_dir: Path to experiment directory
112+
113+ Returns:
114+ Tuple of (metrics_data, config_data)
115+ """
116+ # Find the benchmark subdirectory (e.g., longbench_passage_retrieval_en)
117+ benchmark_dirs = [d for d in exp_dir .iterdir () if d .is_dir ()]
118+ if not benchmark_dirs :
119+ return [], {}
120+
121+ benchmark_dir = benchmark_dirs [0 ] # Take the first benchmark directory
122+
123+ # Load configuration
124+ config_path = benchmark_dir / "config.json"
125+ if not config_path .exists ():
126+ return [], {}
127+
128+ config = load_config_file (config_path )
129+
130+ # Load micro metrics
131+ metrics_path = benchmark_dir / "micro_metrics.jsonl"
132+ if not metrics_path .exists ():
133+ return [], {}
134+
135+ metrics = load_micro_metrics (metrics_path )
136+
137+ return metrics , config
138+
139+
140+ def extract_sparse_config_params (config : Dict [str , Any ]) -> Dict [str , Any ]:
141+ """Extract sparse attention configuration parameters.
142+
143+ Args:
144+ config: Configuration dictionary
145+
146+ Returns:
147+ Dictionary with sparse attention parameters
148+ """
149+ sparse_config = config .get ("sparse_attention_config" , {})
150+ masker_configs = sparse_config .get ("masker_configs" , [])
151+
152+ params = {}
153+
154+ # Extract parameters from masker configs
155+ for masker_config in masker_configs :
156+ if "sink_size" in masker_config :
157+ params ["sink_size" ] = masker_config ["sink_size" ]
158+ elif "window_size" in masker_config :
159+ params ["window_size" ] = masker_config ["window_size" ]
160+ elif "heavy_size" in masker_config :
161+ params ["heavy_size" ] = masker_config ["heavy_size" ]
162+ elif "base_rate_sampling" in masker_config :
163+ params ["base_rate_sampling" ] = masker_config ["base_rate_sampling" ]
164+ params ["epsilon" ] = masker_config .get ("epsilon" )
165+ params ["delta" ] = masker_config .get ("delta" )
166+ params ["init_offset" ] = masker_config .get ("init_offset" )
167+ params ["local_offset" ] = masker_config .get ("local_offset" )
168+ elif "top_k" in masker_config :
169+ params ["top_k" ] = masker_config ["top_k" ]
170+ elif "top_p" in masker_config :
171+ params ["top_p" ] = masker_config ["top_p" ]
172+
173+ return params
174+
175+
176+ def organize_metrics_by_layer (metrics : List [Dict [str , Any ]]) -> Dict [int , Dict [str , float ]]:
177+ """Organize metrics by layer index and average multiple measurements.
178+
179+ Args:
180+ metrics: List of metric entries
181+
182+ Returns:
183+ Dictionary mapping layer_idx to averaged metrics
184+ """
185+ layer_metrics = {}
186+
187+ # First pass: collect all values for each layer
188+ for metric in metrics :
189+ layer_idx = metric .get ("metadata" , {}).get ("layer_idx" )
190+ if layer_idx is None :
191+ continue
192+
193+ if layer_idx not in layer_metrics :
194+ layer_metrics [layer_idx ] = {"density" : [], "error" : []}
195+
196+ metric_name = metric .get ("metric" )
197+ value = metric .get ("value" )
198+
199+ if metric_name == "research_attention_density" :
200+ layer_metrics [layer_idx ]["density" ].append (value )
201+ elif metric_name == "research_attention_output_error" :
202+ layer_metrics [layer_idx ]["error" ].append (value )
203+
204+ # Second pass: average the collected values
205+ averaged_metrics = {}
206+ for layer_idx , values in layer_metrics .items ():
207+ averaged_metrics [layer_idx ] = {}
208+
209+ if values ["density" ]:
210+ averaged_metrics [layer_idx ]["density" ] = sum (values ["density" ]) / len (values ["density" ])
211+
212+ if values ["error" ]:
213+ averaged_metrics [layer_idx ]["error" ] = sum (values ["error" ]) / len (values ["error" ])
214+
215+ return averaged_metrics
216+
217+
218+ def analyze_stress_tests (results_dir : str , output_dir : str ) -> None :
219+ """Analyze stress test results and generate TSV files.
220+
221+ Args:
222+ results_dir: Path to stress_test_adaptive.matrix directory
223+ output_dir: Output directory for TSV files
224+ """
225+ results_path = Path (results_dir )
226+ output_path = Path (output_dir )
227+ output_path .mkdir (exist_ok = True )
228+
229+ # Find model directories
230+ model_dirs = [d for d in results_path .iterdir () if d .is_dir ()]
231+
232+ all_vector_data = []
233+ all_metadata = []
234+
235+ for model_dir in model_dirs :
236+ model_name = model_dir .name
237+
238+ # Find configuration directories
239+ config_dirs = [d for d in model_dir .iterdir () if d .is_dir ()]
240+
241+ for config_dir in config_dirs :
242+ config_name = config_dir .name
243+
244+ # Parse configuration name
245+ parsed_params = parse_config_name (config_name )
246+
247+ # Process experiment directory
248+ metrics , config = process_experiment_directory (config_dir )
249+
250+ if not metrics or not config :
251+ continue
252+
253+ # Extract sparse attention parameters
254+ sparse_params = extract_sparse_config_params (config )
255+
256+ # Organize metrics by layer
257+ layer_metrics = organize_metrics_by_layer (metrics )
258+
259+ # Generate vector data
260+ for layer_idx , layer_data in layer_metrics .items ():
261+ if "density" in layer_data and "error" in layer_data :
262+ vector_entry = {
263+ "model" : model_name ,
264+ "config" : config_name ,
265+ "layer_idx" : layer_idx ,
266+ "density" : layer_data ["density" ],
267+ "error" : layer_data ["error" ]
268+ }
269+ all_vector_data .append (vector_entry )
270+
271+ # Generate metadata entry
272+ metadata_entry = {
273+ "model" : model_name ,
274+ "config" : config_name ,
275+ "layer_idx" : "all" , # This will be expanded for each layer
276+ ** parsed_params ,
277+ ** sparse_params
278+ }
279+
280+ # Add metadata for each layer
281+ for layer_idx in layer_metrics .keys ():
282+ layer_metadata = metadata_entry .copy ()
283+ layer_metadata ["layer_idx" ] = layer_idx
284+ all_metadata .append (layer_metadata )
285+
286+ # Write vector.tsv
287+ vector_path = output_path / "vector.tsv"
288+ metadata_path = output_path / "metadata.tsv"
289+
290+ # Write vector data
291+ with open (vector_path , 'w' ) as f :
292+ f .write ("density\t error\n " )
293+ for entry in all_vector_data :
294+ f .write (f"{ entry ['density' ]} \t { entry ['error' ]} \n " )
295+
296+ # Write metadata
297+ with open (metadata_path , 'w' ) as f :
298+ if all_metadata :
299+ # Get all unique keys from all metadata entries
300+ all_keys = set ()
301+ for entry in all_metadata :
302+ all_keys .update (entry .keys ())
303+
304+ # Sort keys for consistent output
305+ sorted_keys = sorted (all_keys )
306+
307+ # Write header
308+ f .write ("\t " .join (sorted_keys ) + "\n " )
309+
310+ # Write data
311+ for entry in all_metadata :
312+ row = [str (entry .get (key , "" )) for key in sorted_keys ]
313+ f .write ("\t " .join (row ) + "\n " )
314+
315+
316+
317+ print (f"Analysis complete!" )
318+ print (f"Vector data written to: { vector_path } " )
319+ print (f"Metadata written to: { metadata_path } " )
320+ print (f"Total vector entries: { len (all_vector_data )} " )
321+ print (f"Total metadata entries: { len (all_metadata )} " )
322+
323+
324+ def main ():
325+ """Main function."""
326+ parser = argparse .ArgumentParser (description = "Analyze stress test results" )
327+ parser .add_argument (
328+ "--results-dir" ,
329+ default = "./stress_test_adaptive.matrix" ,
330+ help = "Path to stress test results directory"
331+ )
332+ parser .add_argument (
333+ "--output-dir" ,
334+ default = "./analysis_output" ,
335+ help = "Output directory for TSV files"
336+ )
337+
338+ args = parser .parse_args ()
339+
340+ # Check if results directory exists
341+ if not os .path .exists (args .results_dir ):
342+ print (f"Error: Results directory '{ args .results_dir } ' does not exist" )
343+ return
344+
345+ analyze_stress_tests (args .results_dir , args .output_dir )
346+
347+
348+ if __name__ == "__main__" :
349+ main ()
0 commit comments