[DEVOPS-3294] DB unit test scripts: Add spark planned tests report

svarnau · svarnau · commit 4ad9b8457454 · 2024-12-12T13:26:20.000-08:00
Summary: This is to enable visibility of tests that were not run because a spark job was killed or spark was just unsuccessful in running a test. run_tests_on_spark.py - produce list of planned tests. analyze_test_results.py - consider planned tests as the "total" instead of the union of spark and junit results. Added couple more output files of analysis that can be archived to jenkins. Some small enhancements to enable debugging these scripts on dev-server. Test Plan: spark-submit ... aggregate_test_reports.py ... analyze_test_results.py ... Mimicing how they are called in build-support/jenkins/yb-jenkins-test.sh, using test list c++, java, and non-existant tests. Reviewers: jharveysmith Reviewed By: jharveysmith Subscribers: devops Differential Revision: https://phorge.dev.yugabyte.com/D40493
diff --git a/build-support/jenkins/yb-jenkins-test.sh b/build-support/jenkins/yb-jenkins-test.sh
@@ -267,17 +267,21 @@ log "Aggregating test reports"
 log "Analyzing test results"
 test_results_from_junit_xml_path=${YB_SRC_ROOT}/test_results.json
 test_results_from_spark_path=${BUILD_ROOT}/full_build_report.json.gz
+planned_tests_path=${BUILD_ROOT}/planned_tests.json
 
 if [[ -f $test_results_from_junit_xml_path &&
-      -f $test_results_from_spark_path ]]; then
+      -f $test_results_from_spark_path &&
+      $NUM_REPETITIONS == 1 ]]; then
   (
     set -x
     "$YB_SCRIPT_PATH_ANALYZE_TEST_RESULTS" \
           "--aggregated-json-test-results=$test_results_from_junit_xml_path" \
+          "--planned-tests=$planned_tests_path" \
           "--run-tests-on-spark-report=$test_results_from_spark_path" \
           "--archive-dir=$YB_SRC_ROOT" \
-          "--successful-tests-out-path=$YB_SRC_ROOT/successful_tests.txt" \
-          "--test-list-out-path=$YB_SRC_ROOT/test_list.txt"
+          "--successful-tests-out-path=$YB_SRC_ROOT/test_successes.txt" \
+          "--test-list-out-path=$YB_SRC_ROOT/test_list.txt" \
+          "--analysis-out-path=$YB_SRC_ROOT/test_analysis.txt"
   )
 else
   if [[ ! -f $test_results_from_junit_xml_path ]]; then
@@ -286,6 +290,9 @@ else
   if [[ ! -f $test_results_from_spark_path ]]; then
     log "File $test_results_from_spark_path does not exist"
   fi
+  if [[ $NUM_REPETITIONS != 1 ]]; then
+    log "Analyze script cannot handle multiple repetitions."
+  fi
   log "Not running $YB_SCRIPT_PATH_ANALYZE_TEST_RESULTS"
 fi
 
diff --git a/python/yugabyte/analyze_test_results.py b/python/yugabyte/analyze_test_results.py
@@ -33,6 +33,11 @@ def parse_args() -> argparse.Namespace:
         help='Aggregated JSON report of test results generated by aggregate_test_reports.py. '
              'Usually named test_results.json.',
         required=True)
+    parser.add_argument(
+        '--planned-tests',
+        help='Spark planned test list produced by run_tests_on_spark.py. Usually named '
+             'planned_tests.json.',
+        required=True)
     parser.add_argument(
         '--run-tests-on-spark-report',
         help='Full build report produced by run_tests_on_spark.py. Usually named '
@@ -51,6 +56,9 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         '--test-list-out-path',
         help='Write the list of test descriptors from both types of reports to this file.')
+    parser.add_argument(
+        '--analysis-out-path',
+        help='Write the analysis to this file as well as stdout.')
     parser.add_argument(
         '--verbose',
         action='store_true',
@@ -66,8 +74,10 @@ def parse_args() -> argparse.Namespace:
 
 @dataclass
 class AnalysisResult:
+    num_tests_planned: int = 0
     num_tests_in_junit_xml: int = 0
     num_tests_in_spark: int = 0
+    num_tests_did_not_run: int = 0
 
     num_failed_tests_in_junit_xml: int = 0
     num_failed_tests_in_spark: int = 0
@@ -86,7 +96,7 @@ class AnalysisResult:
     num_tests_without_spark_report: int = 0
 
     # Total number of unique test descriptors found across any types of reports (union).
-    num_total_unique_tests: int = 0
+    num_unique_test_results: int = 0
 
     # Total number of unique test descriptors found across both types of reports (intersection).
     num_unique_tests_present_in_both_report_types: int = 0
@@ -108,6 +118,7 @@ class SingleBuildAnalyzer:
 
     def __init__(self,
                  aggregated_test_results_path: str,
+                 planned_tests_path: str,
                  run_tests_on_spark_report_path: str,
                  archive_dir: Optional[str],
                  successful_tests_out_path: Optional[str] = None,
@@ -117,16 +128,20 @@ def __init__(self,
             results. Look at aggregate_test_reports.py for the format of this dictionary, and at
             python/yugabyte/test_data/clang16_debug_centos7_test_report_from_junit_xml.json.gz for
             an example.
+        :param planned_tests_path: Path to the JSON file containing list of tests to be run.
+            Example: python/yugabyte/test_data/planned_tests.json
         :param run_tests_on_spark_report_path: Path to the JSON file containing the full build
             report produced by run_tests_on_spark.py. As an example, look at the following file:
             python/yugabyte/test_data/clang16_debug_centos7_run_tests_on_spark_full_report.json.gz
         """
         logging.info("Reading aggregated JUnit XML test results from %s",
                      aggregated_test_results_path)
-        logging.info("Reading full Spark build report from %s", run_tests_on_spark_report_path)
         self.test_reports_from_junit_xml = cast(
             List[Dict[str, Any]],
             json_util.read_json_file(aggregated_test_results_path)['tests'])
+        logging.info("Reading planned Spark test list from %s", planned_tests_path)
+        self.planned_tests_list = json_util.read_json_file(planned_tests_path)
+        logging.info("Reading full Spark build report from %s", run_tests_on_spark_report_path)
         self.run_tests_on_spark_report = cast(
             Dict[str, SparkTaskReport],
             json_util.read_json_file(run_tests_on_spark_report_path)['tests'])
@@ -234,6 +249,10 @@ def analyze(self) -> AnalysisResult:
 
         desc_to_spark_task_report: Dict[SimpleTestDescriptor, SparkTaskReport] = {}
 
+        # TODO: This script does not support multiple test repotitions. Test descriptors are
+        #       assumed to be SimpleTestDescriptors with no :::attempt_X suffixes, and
+        #       assertions ensure that the name is not unique in the input report.
+        #       Even if it is not supported, it should gracefully ignore such tests.
         failed_tests_in_spark: Set[SimpleTestDescriptor] = set()
         for test_desc_str, spark_test_report in self.run_tests_on_spark_report.items():
             test_desc = SimpleTestDescriptor.parse(test_desc_str)
@@ -336,19 +355,24 @@ def analyze(self) -> AnalysisResult:
                         junit_xml_report.get('num_failures', 0) > 0):
                     failed_tests_in_junit_xml.add(test_desc)
 
-        # Compare the set of tests (both successes and failures) for two types of reports.
-        for test_desc in sorted(
-                desc_to_spark_task_report.keys() | deduped_junit_reports_dict.keys()):
-            reports_from_junit_xml = deduped_junit_reports_dict.get(test_desc)
+        # Compare the spark planned tests to spark & junit results.
+        result.num_tests_planned = len(self.planned_tests_list)
+        planned_desc_list = [SimpleTestDescriptor.parse(td_str)
+                             for td_str in self.planned_tests_list]
+        for test_desc in planned_desc_list:
             spark_task_report = desc_to_spark_task_report.get(test_desc)
-            if reports_from_junit_xml is None:
+            reports_from_junit_xml = deduped_junit_reports_dict.get(test_desc)
+            if spark_task_report is None and reports_from_junit_xml is None:
+                logging.info("Test descriptor %s has no results", test_desc)
+                result.num_tests_did_not_run += 1
+                result.num_tests_without_junit_xml_report += 1
+                result.num_tests_without_spark_report += 1
+            elif reports_from_junit_xml is None:
                 logging.info("Test descriptor %s has no reports from JUnit XML files", test_desc)
                 result.num_tests_without_junit_xml_report += 1
-                continue
-            if spark_task_report is None:
+            elif spark_task_report is None:
                 logging.info("Test descriptor %s has no report from Spark", test_desc)
-                result.num_tests_without_spark_report = 1
-                continue
+                result.num_tests_without_spark_report += 1
 
         for test_desc in sorted(failed_tests_in_spark):
             if test_desc not in failed_tests_in_junit_xml:
@@ -380,8 +404,8 @@ def analyze(self) -> AnalysisResult:
 
         all_test_descs = (set(desc_to_spark_task_report.keys()) |
                           set(desc_to_test_reports_from_junit_xml.keys()))
-        result.num_total_unique_tests = len(all_test_descs)
-        logging.info("Found %d unique tests total" % result.num_total_unique_tests)
+        result.num_unique_test_results = len(all_test_descs)
+        logging.info("Found %d unique tests total" % result.num_unique_test_results)
 
         tests_present_in_both = (set(desc_to_spark_task_report.keys()) &
                                  set(desc_to_test_reports_from_junit_xml.keys()))
@@ -395,7 +419,7 @@ def analyze(self) -> AnalysisResult:
         test_descriptor.write_test_descriptors_to_file(
             self.successful_tests_out_path, successful_tests, 'successful tests')
         test_descriptor.write_test_descriptors_to_file(
-            self.test_list_out_path, all_test_descs, 'all tests')
+            self.test_list_out_path, planned_desc_list, 'all tests')
 
         return result
 
@@ -405,14 +429,21 @@ def main() -> None:
     common_util.init_logging(verbose=args.verbose)
     result = SingleBuildAnalyzer(
         args.aggregated_json_test_results,
+        args.planned_tests,
         args.run_tests_on_spark_report,
         args.archive_dir,
         args.successful_tests_out_path,
         args.test_list_out_path
     ).analyze()
 
+    stats = ''
     for field in dataclasses.fields(result):
         logging.info("%s: %s", field.name, getattr(result, field.name))
+        stats += f"{field.name}: {getattr(result, field.name)}\n"
+
+    if args.analysis_out_path:
+        logging.info("Writing the analysis stats to %s", args.analysis_out_path)
+        file_util.write_file(stats, args.analysis_out_path)
 
 
 if __name__ == '__main__':
diff --git a/python/yugabyte/artifact_upload.py b/python/yugabyte/artifact_upload.py
@@ -258,8 +258,11 @@ def ensure_dir_exists(dir_path: str) -> None:
             try:
                 if method == UploadMethod.SSH:
                     assert dest_host is not None
-                    subprocess.check_call(['ssh', dest_host, 'mkdir', '-p', dest_dir])
-                    subprocess.check_call(['scp', artifact_path, f'{dest_host}:{dest_dir}/'])
+                    subprocess.check_call(['ssh', '-o', 'StrictHostKeyChecking=no', dest_host,
+                                          'mkdir', '-p', dest_dir])
+                    subprocess.check_call(['scp', '-o', 'StrictHostKeyChecking=no', artifact_path,
+                                          f'{dest_host}:{dest_dir}/'])
+
                 elif method == UploadMethod.CP:
                     ensure_dir_exists(dest_dir)
                     subprocess.check_call(['cp', '-f', artifact_path, dest_path])
diff --git a/python/yugabyte/run_tests_on_spark.py b/python/yugabyte/run_tests_on_spark.py
@@ -1250,7 +1250,10 @@ def main() -> None:
     # End of argument validation.
     # ---------------------------------------------------------------------------------------------
 
-    os.environ['YB_BUILD_HOST'] = socket.gethostname()
+    if os.getenv('YB_SPARK_COPY_MODE') == 'SSH':
+        os.environ['YB_BUILD_HOST'] = os.environ['USER'] + '@' + socket.gethostname()
+    else:
+        os.environ['YB_BUILD_HOST'] = socket.gethostname()
     thirdparty_path = build_paths.BuildPaths(args.build_root).thirdparty_path
     assert thirdparty_path is not None
     os.environ['YB_THIRDPARTY_DIR'] = thirdparty_path
@@ -1320,6 +1323,14 @@ def main() -> None:
             for i in range(1, num_repetitions + 1)
         ]
 
+    if args.save_report_to_build_dir:
+        planned_report_paths = []
+        planned_report_paths.append(os.path.join(global_conf.build_root, 'planned_tests.json'))
+        planned = []
+        for td in test_descriptors:
+            planned.append(td.descriptor_str)
+        save_json_to_paths('planned tests', planned, planned_report_paths, should_gzip=False)
+
     app_name_details = ['{} tests total'.format(total_num_tests)]
     if num_repetitions > 1:
         app_name_details += ['{} repetitions of {} tests'.format(num_repetitions, num_tests)]
diff --git a/python/yugabyte/test_analyze_test_results.py b/python/yugabyte/test_analyze_test_results.py
@@ -22,11 +22,14 @@ def test_analyze_test_results() -> None:
     test_data_base_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_data')
     aggregated_test_report_path = os.path.join(
         test_data_base_dir, 'clang16_debug_centos7_test_report_from_junit_xml.json')
+    planned_tests = os.path.join(
+        test_data_base_dir, 'planned_tests.json')
     run_tests_on_spark_full_report = os.path.join(
         test_data_base_dir,
         'clang16_debug_centos7_run_tests_on_spark_full_report.json')
     analyzer = analyze_test_results.SingleBuildAnalyzer(
         aggregated_test_report_path,
+        planned_tests,
         run_tests_on_spark_full_report,
         archive_dir=None)
     result = analyzer.analyze()
@@ -35,15 +38,17 @@ def test_analyze_test_results() -> None:
         analyze_test_results.AnalysisResult(
             num_tests_in_junit_xml=384,
             num_tests_in_spark=385,
+            num_tests_planned=387,
+            num_tests_did_not_run=2,
             num_failed_tests_in_junit_xml=2,
             num_failed_tests_in_spark=2,
             num_unique_failed_tests=3,
             num_dedup_errors_in_junit_xml=0,
-            num_total_unique_tests=387,
+            num_unique_test_results=387,
             num_tests_failed_in_spark_but_not_junit_xml=1,
             num_tests_failed_in_junit_xml_but_not_spark=1,
-            num_tests_without_junit_xml_report=3,
-            num_tests_without_spark_report=1,
+            num_tests_without_junit_xml_report=5,
+            num_tests_without_spark_report=2,
             num_successful_tests=379,
             num_unique_tests_present_in_both_report_types=382
         ))
diff --git a/python/yugabyte/test_data/planned_tests.json b/python/yugabyte/test_data/planned_tests.json