@@ -33,6 +33,11 @@ def parse_args() -> argparse.Namespace:
3333 help = 'Aggregated JSON report of test results generated by aggregate_test_reports.py. '
3434 'Usually named test_results.json.' ,
3535 required = True )
36+ parser .add_argument (
37+ '--planned-tests' ,
38+ help = 'Spark planned test list produced by run_tests_on_spark.py. Usually named '
39+ 'planned_tests.json.' ,
40+ required = True )
3641 parser .add_argument (
3742 '--run-tests-on-spark-report' ,
3843 help = 'Full build report produced by run_tests_on_spark.py. Usually named '
@@ -51,6 +56,9 @@ def parse_args() -> argparse.Namespace:
5156 parser .add_argument (
5257 '--test-list-out-path' ,
5358 help = 'Write the list of test descriptors from both types of reports to this file.' )
59+ parser .add_argument (
60+ '--analysis-out-path' ,
61+ help = 'Write the analysis to this file as well as stdout.' )
5462 parser .add_argument (
5563 '--verbose' ,
5664 action = 'store_true' ,
@@ -66,8 +74,10 @@ def parse_args() -> argparse.Namespace:
6674
6775@dataclass
6876class AnalysisResult :
77+ num_tests_planned : int = 0
6978 num_tests_in_junit_xml : int = 0
7079 num_tests_in_spark : int = 0
80+ num_tests_did_not_run : int = 0
7181
7282 num_failed_tests_in_junit_xml : int = 0
7383 num_failed_tests_in_spark : int = 0
@@ -86,7 +96,7 @@ class AnalysisResult:
8696 num_tests_without_spark_report : int = 0
8797
8898 # Total number of unique test descriptors found across any types of reports (union).
89- num_total_unique_tests : int = 0
99+ num_unique_test_results : int = 0
90100
91101 # Total number of unique test descriptors found across both types of reports (intersection).
92102 num_unique_tests_present_in_both_report_types : int = 0
@@ -108,6 +118,7 @@ class SingleBuildAnalyzer:
108118
109119 def __init__ (self ,
110120 aggregated_test_results_path : str ,
121+ planned_tests_path : str ,
111122 run_tests_on_spark_report_path : str ,
112123 archive_dir : Optional [str ],
113124 successful_tests_out_path : Optional [str ] = None ,
@@ -117,16 +128,20 @@ def __init__(self,
117128 results. Look at aggregate_test_reports.py for the format of this dictionary, and at
118129 python/yugabyte/test_data/clang16_debug_centos7_test_report_from_junit_xml.json.gz for
119130 an example.
131+ :param planned_tests_path: Path to the JSON file containing list of tests to be run.
132+ Example: python/yugabyte/test_data/planned_tests.json
120133 :param run_tests_on_spark_report_path: Path to the JSON file containing the full build
121134 report produced by run_tests_on_spark.py. As an example, look at the following file:
122135 python/yugabyte/test_data/clang16_debug_centos7_run_tests_on_spark_full_report.json.gz
123136 """
124137 logging .info ("Reading aggregated JUnit XML test results from %s" ,
125138 aggregated_test_results_path )
126- logging .info ("Reading full Spark build report from %s" , run_tests_on_spark_report_path )
127139 self .test_reports_from_junit_xml = cast (
128140 List [Dict [str , Any ]],
129141 json_util .read_json_file (aggregated_test_results_path )['tests' ])
142+ logging .info ("Reading planned Spark test list from %s" , planned_tests_path )
143+ self .planned_tests_list = json_util .read_json_file (planned_tests_path )
144+ logging .info ("Reading full Spark build report from %s" , run_tests_on_spark_report_path )
130145 self .run_tests_on_spark_report = cast (
131146 Dict [str , SparkTaskReport ],
132147 json_util .read_json_file (run_tests_on_spark_report_path )['tests' ])
@@ -234,6 +249,10 @@ def analyze(self) -> AnalysisResult:
234249
235250 desc_to_spark_task_report : Dict [SimpleTestDescriptor , SparkTaskReport ] = {}
236251
252+ # TODO: This script does not support multiple test repotitions. Test descriptors are
253+ # assumed to be SimpleTestDescriptors with no :::attempt_X suffixes, and
254+ # assertions ensure that the name is not unique in the input report.
255+ # Even if it is not supported, it should gracefully ignore such tests.
237256 failed_tests_in_spark : Set [SimpleTestDescriptor ] = set ()
238257 for test_desc_str , spark_test_report in self .run_tests_on_spark_report .items ():
239258 test_desc = SimpleTestDescriptor .parse (test_desc_str )
@@ -336,19 +355,24 @@ def analyze(self) -> AnalysisResult:
336355 junit_xml_report .get ('num_failures' , 0 ) > 0 ):
337356 failed_tests_in_junit_xml .add (test_desc )
338357
339- # Compare the set of tests (both successes and failures) for two types of reports.
340- for test_desc in sorted (
341- desc_to_spark_task_report .keys () | deduped_junit_reports_dict .keys ()):
342- reports_from_junit_xml = deduped_junit_reports_dict .get (test_desc )
358+ # Compare the spark planned tests to spark & junit results.
359+ result .num_tests_planned = len (self .planned_tests_list )
360+ planned_desc_list = [SimpleTestDescriptor .parse (td_str )
361+ for td_str in self .planned_tests_list ]
362+ for test_desc in planned_desc_list :
343363 spark_task_report = desc_to_spark_task_report .get (test_desc )
344- if reports_from_junit_xml is None :
364+ reports_from_junit_xml = deduped_junit_reports_dict .get (test_desc )
365+ if spark_task_report is None and reports_from_junit_xml is None :
366+ logging .info ("Test descriptor %s has no results" , test_desc )
367+ result .num_tests_did_not_run += 1
368+ result .num_tests_without_junit_xml_report += 1
369+ result .num_tests_without_spark_report += 1
370+ elif reports_from_junit_xml is None :
345371 logging .info ("Test descriptor %s has no reports from JUnit XML files" , test_desc )
346372 result .num_tests_without_junit_xml_report += 1
347- continue
348- if spark_task_report is None :
373+ elif spark_task_report is None :
349374 logging .info ("Test descriptor %s has no report from Spark" , test_desc )
350- result .num_tests_without_spark_report = 1
351- continue
375+ result .num_tests_without_spark_report += 1
352376
353377 for test_desc in sorted (failed_tests_in_spark ):
354378 if test_desc not in failed_tests_in_junit_xml :
@@ -380,8 +404,8 @@ def analyze(self) -> AnalysisResult:
380404
381405 all_test_descs = (set (desc_to_spark_task_report .keys ()) |
382406 set (desc_to_test_reports_from_junit_xml .keys ()))
383- result .num_total_unique_tests = len (all_test_descs )
384- logging .info ("Found %d unique tests total" % result .num_total_unique_tests )
407+ result .num_unique_test_results = len (all_test_descs )
408+ logging .info ("Found %d unique tests total" % result .num_unique_test_results )
385409
386410 tests_present_in_both = (set (desc_to_spark_task_report .keys ()) &
387411 set (desc_to_test_reports_from_junit_xml .keys ()))
@@ -395,7 +419,7 @@ def analyze(self) -> AnalysisResult:
395419 test_descriptor .write_test_descriptors_to_file (
396420 self .successful_tests_out_path , successful_tests , 'successful tests' )
397421 test_descriptor .write_test_descriptors_to_file (
398- self .test_list_out_path , all_test_descs , 'all tests' )
422+ self .test_list_out_path , planned_desc_list , 'all tests' )
399423
400424 return result
401425
@@ -405,14 +429,21 @@ def main() -> None:
405429 common_util .init_logging (verbose = args .verbose )
406430 result = SingleBuildAnalyzer (
407431 args .aggregated_json_test_results ,
432+ args .planned_tests ,
408433 args .run_tests_on_spark_report ,
409434 args .archive_dir ,
410435 args .successful_tests_out_path ,
411436 args .test_list_out_path
412437 ).analyze ()
413438
439+ stats = ''
414440 for field in dataclasses .fields (result ):
415441 logging .info ("%s: %s" , field .name , getattr (result , field .name ))
442+ stats += f"{ field .name } : { getattr (result , field .name )} \n "
443+
444+ if args .analysis_out_path :
445+ logging .info ("Writing the analysis stats to %s" , args .analysis_out_path )
446+ file_util .write_file (stats , args .analysis_out_path )
416447
417448
418449if __name__ == '__main__' :
0 commit comments