Run spec tests in parallel to reduce the execution time (#8088)

stevenfontanella · web-flow · commit 28e849b91fab · 2025-12-05T21:10:20.000Z
Reduces runtime from ~15 minutes to 1.5 minutes on my machine

* Run tests through a thread pool with `os.cpu_count()` threads
* `os.cpu_count() * 4` shows no benefit. `os.cpu_count() // 2` also shows
no regression in runtime, but might be
worse for machines with less cores. There are currently 315 spec tests
total for reference.
* Prefixes round-trip file name tests with their test name to avoid
clobbering the a.wasm / ab.wast files during tests
* Add stdout and stderr params to functions that print so that lines can
be captured by each thread and not interleaved
* Note that we pass stdout as the stderr param in practice so that they
are interleaved, otherwise all stdout lines and stderr lines will be
outputted together in each test.
diff --git a/check.py b/check.py
@@ -20,6 +20,12 @@
 import sys
 import unittest
 from collections import OrderedDict
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+import queue
+import io
+import threading
+from functools import partial
 
 from scripts.test import binaryenjs
 from scripts.test import lld
@@ -175,73 +181,129 @@ def run_wasm_reduce_tests():
         assert after < 0.85 * before, [before, after]
 
 
-def run_spec_tests():
-    print('\n[ checking wasm-shell spec testcases... ]\n')
+def run_spec_test(wast, stdout=None, stderr=None):
+    cmd = shared.WASM_SHELL + [wast]
+    output = support.run_command(cmd, stdout=stdout, stderr=subprocess.PIPE)
+    # filter out binaryen interpreter logging that the spec suite
+    # doesn't expect
+    filtered = [line for line in output.splitlines() if not line.startswith('[trap')]
+    return '\n'.join(filtered) + '\n'
 
-    for wast in shared.options.spec_tests:
-        base = os.path.basename(wast)
-        print('..', base)
-        # windows has some failures that need to be investigated
-        if base == 'names.wast' and shared.skip_if_on_windows('spec: ' + base):
-            continue
 
-        def run_spec_test(wast):
-            cmd = shared.WASM_SHELL + [wast]
-            output = support.run_command(cmd, stderr=subprocess.PIPE)
-            # filter out binaryen interpreter logging that the spec suite
-            # doesn't expect
-            filtered = [line for line in output.splitlines() if not line.startswith('[trap')]
-            return '\n'.join(filtered) + '\n'
-
-        def run_opt_test(wast):
-            # check optimization validation
-            cmd = shared.WASM_OPT + [wast, '-O', '-all', '-q']
-            support.run_command(cmd)
+def run_opt_test(wast, stdout=None, stderr=None):
+    # check optimization validation
+    cmd = shared.WASM_OPT + [wast, '-O', '-all', '-q']
+    support.run_command(cmd, stdout=stdout)
+
+
+def check_expected(actual, expected, stdout=None):
+    if expected and os.path.exists(expected):
+        expected = open(expected).read()
+        print('       (using expected output)', file=stdout)
+        actual = actual.strip()
+        expected = expected.strip()
+        if actual != expected:
+            shared.fail(actual, expected)
+
+
+def run_one_spec_test(wast: Path, stdout=None, stderr=None):
+    test_name = wast.name
+
+    # /path/to/binaryen/test/spec/foo.wast -> test-spec-foo
+    base_name = "-".join(wast.relative_to(Path(shared.options.binaryen_root)).with_suffix("").parts)
+
+    print('..', test_name, file=stdout)
+    # windows has some failures that need to be investigated
+    if test_name == 'names.wast' and shared.skip_if_on_windows('spec: ' + test_name):
+        return
+
+    expected = os.path.join(shared.get_test_dir('spec'), 'expected-output', test_name + '.log')
+
+    # some spec tests should fail (actual process failure, not just assert_invalid)
+    try:
+        actual = run_spec_test(str(wast), stdout=stdout, stderr=stderr)
+    except Exception as e:
+        if ('wasm-validator error' in str(e) or 'error: ' in str(e)) and '.fail.' in test_name:
+            print('<< test failed as expected >>', file=stdout)
+            return  # don't try all the binary format stuff TODO
+        else:
+            shared.fail_with_error(str(e))
+
+    check_expected(actual, expected, stdout=stdout)
+
+    # check binary format. here we can verify execution of the final
+    # result, no need for an output verification
+    actual = ''
+    transformed_path = base_name + ".transformed"
+    with open(transformed_path, 'w') as transformed_spec_file:
+        for i, (module, asserts) in enumerate(support.split_wast(str(wast))):
+            if not module:
+                # Skip any initial assertions that don't have a module
+                continue
+            print(f'        testing split module {i}', file=stdout)
+            split_name = base_name + f'_split{i}.wast'
+            support.write_wast(split_name, module)
+            run_opt_test(split_name, stdout=stdout, stderr=stderr)    # also that our optimizer doesn't break on it
+
+            result_wast_file = shared.binary_format_check(split_name, verify_final_result=False, base_name=base_name, stdout=stdout, stderr=stderr)
+            with open(result_wast_file) as f:
+                result_wast = f.read()
+                # add the asserts, and verify that the test still passes
+                transformed_spec_file.write(result_wast + '\n' + '\n'.join(asserts))
+
+    # compare all the outputs to the expected output
+    actual = run_spec_test(transformed_path, stdout=stdout, stderr=stderr)
+    check_expected(actual, os.path.join(shared.get_test_dir('spec'), 'expected-output', test_name + '.log'), stdout=stdout)
+
+
+def run_spec_test_with_wrapped_stdout(output_queue, wast: Path):
+    out = io.StringIO()
+    try:
+        ret = run_one_spec_test(wast, stdout=out, stderr=out)
+    except Exception as e:
+        print(e, file=out)
+        raise
+    finally:
+        # If a test fails, it's important to keep its output
+        output_queue.put(out.getvalue())
+    return ret
+
+
+def run_spec_tests():
+    print('\n[ checking wasm-shell spec testcases... ]\n')
 
-        def check_expected(actual, expected):
-            if expected and os.path.exists(expected):
-                expected = open(expected).read()
-                print('       (using expected output)')
-                actual = actual.strip()
-                expected = expected.strip()
-                if actual != expected:
-                    shared.fail(actual, expected)
-
-        expected = os.path.join(shared.get_test_dir('spec'), 'expected-output', base + '.log')
-
-        # some spec tests should fail (actual process failure, not just assert_invalid)
-        try:
-            actual = run_spec_test(wast)
-        except Exception as e:
-            if ('wasm-validator error' in str(e) or 'error: ' in str(e)) and '.fail.' in base:
-                print('<< test failed as expected >>')
-                continue  # don't try all the binary format stuff TODO
-            else:
-                shared.fail_with_error(str(e))
-
-        check_expected(actual, expected)
-
-        # check binary format. here we can verify execution of the final
-        # result, no need for an output verification
-        actual = ''
-        with open(base, 'w') as transformed_spec_file:
-            for i, (module, asserts) in enumerate(support.split_wast(wast)):
-                if not module:
-                    # Skip any initial assertions that don't have a module
-                    continue
-                print(f'        testing split module {i}')
-                split_name = os.path.splitext(base)[0] + f'_split{i}.wast'
-                support.write_wast(split_name, module)
-                run_opt_test(split_name)    # also that our optimizer doesn't break on it
-                result_wast_file = shared.binary_format_check(split_name, verify_final_result=False)
-                with open(result_wast_file) as f:
-                    result_wast = f.read()
-                    # add the asserts, and verify that the test still passes
-                    transformed_spec_file.write(result_wast + '\n' + '\n'.join(asserts))
-
-        # compare all the outputs to the expected output
-        actual = run_spec_test(base)
-        check_expected(actual, os.path.join(shared.get_test_dir('spec'), 'expected-output', base + '.log'))
+    output_queue = queue.Queue()
+
+    stop_printer = object()
+
+    def printer():
+        while True:
+            string = output_queue.get()
+            if string is stop_printer:
+                break
+
+            print(string, end="")
+
+    printing_thread = threading.Thread(target=printer)
+    printing_thread.start()
+
+    worker_count = os.cpu_count()
+    print("Running with", worker_count, "workers")
+    executor = ThreadPoolExecutor(max_workers=worker_count)
+    try:
+        results = executor.map(partial(run_spec_test_with_wrapped_stdout, output_queue), map(Path, shared.options.spec_tests))
+        for _ in results:
+            # Iterating joins the threads. No return value here.
+            pass
+    except KeyboardInterrupt:
+        # Hard exit to avoid threads continuing to run after Ctrl-C.
+        # There's no concern of deadlocking during shutdown here.
+        os._exit(1)
+    finally:
+        executor.shutdown(cancel_futures=True)
+
+        output_queue.put(stop_printer)
+        printing_thread.join()
 
 
 def run_validator_tests():
diff --git a/scripts/test/shared.py b/scripts/test/shared.py
@@ -519,34 +519,37 @@ def _can_run_spec_test(test):
 
 
 def binary_format_check(wast, verify_final_result=True, wasm_as_args=['-g'],
-                        binary_suffix='.fromBinary'):
+                        binary_suffix='.fromBinary', base_name=None, stdout=None, stderr=None):
     # checks we can convert the wast to binary and back
 
-    print('         (binary format check)')
-    cmd = WASM_AS + [wast, '-o', 'a.wasm', '-all'] + wasm_as_args
-    print('            ', ' '.join(cmd))
-    if os.path.exists('a.wasm'):
-        os.unlink('a.wasm')
+    as_file = f"{base_name}-a.wasm" if base_name is not None else "a.wasm"
+    disassembled_file = f"{base_name}-ab.wast" if base_name is not None else "ab.wast"
+
+    print('         (binary format check)', file=stdout)
+    cmd = WASM_AS + [wast, '-o', as_file, '-all'] + wasm_as_args
+    print('            ', ' '.join(cmd), file=stdout)
+    if os.path.exists(as_file):
+        os.unlink(as_file)
     subprocess.check_call(cmd, stdout=subprocess.PIPE)
-    assert os.path.exists('a.wasm')
+    assert os.path.exists(as_file)
 
-    cmd = WASM_DIS + ['a.wasm', '-o', 'ab.wast', '-all']
-    print('            ', ' '.join(cmd))
-    if os.path.exists('ab.wast'):
-        os.unlink('ab.wast')
+    cmd = WASM_DIS + [as_file, '-o', disassembled_file, '-all']
+    print('            ', ' '.join(cmd), file=stdout)
+    if os.path.exists(disassembled_file):
+        os.unlink(disassembled_file)
     subprocess.check_call(cmd, stdout=subprocess.PIPE)
-    assert os.path.exists('ab.wast')
+    assert os.path.exists(disassembled_file)
 
     # make sure it is a valid wast
-    cmd = WASM_OPT + ['ab.wast', '-all', '-q']
-    print('            ', ' '.join(cmd))
+    cmd = WASM_OPT + [disassembled_file, '-all', '-q']
+    print('            ', ' '.join(cmd), file=stdout)
     subprocess.check_call(cmd, stdout=subprocess.PIPE)
 
     if verify_final_result:
-        actual = open('ab.wast').read()
+        actual = open(disassembled_file).read()
         fail_if_not_identical_to_file(actual, wast + binary_suffix)
 
-    return 'ab.wast'
+    return disassembled_file
 
 
 def minify_check(wast, verify_final_result=True):
diff --git a/scripts/test/support.py b/scripts/test/support.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import io
 import filecmp
 import os
 import re
@@ -185,16 +186,44 @@ def write_wast(filename, wast, asserts=[]):
             o.write(wast + '\n'.join(asserts))
 
 
-def run_command(cmd, expected_status=0, stderr=None,
+# Hack to allow subprocess.Popen with stdout/stderr to StringIO, which doesn't have a fileno and doesn't work otherwise
+def _process_communicate(*args, **kwargs):
+    overwrite_stderr = "stderr" in kwargs and isinstance(kwargs["stderr"], io.StringIO)
+    overwrite_stdout = "stdout" in kwargs and isinstance(kwargs["stdout"], io.StringIO)
+
+    if overwrite_stdout:
+        stdout_fd = kwargs["stdout"]
+        kwargs["stdout"] = subprocess.PIPE
+    if overwrite_stderr:
+        stderr_fd = kwargs["stderr"]
+        kwargs["stderr"] = subprocess.PIPE
+
+    proc = subprocess.Popen(*args, **kwargs)
+    out, err = proc.communicate()
+
+    if overwrite_stdout:
+        stdout_fd.write(out)
+    if overwrite_stderr:
+        stderr_fd.write(err)
+
+    return out, err, proc.returncode
+
+
+def run_command(cmd, expected_status=0, stdout=None, stderr=None,
                 expected_err=None, err_contains=False, err_ignore=None):
+    '''
+    stderr - None, subprocess.PIPE, subprocess.STDOUT or a file handle / io.StringIO to write stdout to
+    stdout - File handle to print debug messages to
+    returns the process's stdout
+    '''
     if expected_err is not None:
         assert stderr == subprocess.PIPE or stderr is None, \
             "Can't redirect stderr if using expected_err"
         stderr = subprocess.PIPE
-    print('executing: ', ' '.join(cmd))
-    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=stderr, universal_newlines=True, encoding='UTF-8')
-    out, err = proc.communicate()
-    code = proc.returncode
+    print('executing: ', ' '.join(cmd), file=stdout)
+
+    out, err, code = _process_communicate(cmd, stdout=subprocess.PIPE, stderr=stderr, universal_newlines=True, encoding='UTF-8')
+
     if expected_status is not None and code != expected_status:
         raise Exception(f"run_command `{' '.join(cmd)}` failed ({code}) {err or ''}")
     if expected_err is not None: