Enabled memtier_benchmark SPEC (#307)

filipecosta90 · web-flow · commit b79e7a84f215 · 2022-03-09T19:45:06.000Z
* In case of remote rdb fetch and remote run will copy directly from remote file to DB machine (remove extra hop)

* Only call check_if_needs_remote_fetch when remote is False

* Only call check_if_needs_remote_fetch when remote is False

* Enabled memtier_benchmark SPEC

* Added test for extract_benchmark_type_from_config()

* flake8 check fix on run_remote_client_tool()

* Bumping version to 0.7.7

* Added memtier_benchmark to allowed tools list

* Fixed test_prepare_ann_benchmark_command() missing args test
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "redisbench-admin"
-version = "0.7.6"
+version = "0.7.7"
 description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )."
 authors = ["filipecosta90 <filipecosta.90@gmail.com>","Redis Performance Group <performance@redis.com>"]
 readme = "README.md"
diff --git a/redisbench_admin/run/args.py b/redisbench_admin/run/args.py
@@ -60,7 +60,7 @@ def common_run_args(parser):
     parser.add_argument(
         "--allowed-tools",
         type=str,
-        default="redis-benchmark,redisgraph-benchmark-go,ycsb,"
+        default="memtier_benchmark,redis-benchmark,redisgraph-benchmark-go,ycsb,"
         + "tsbs_run_queries_redistimeseries,tsbs_load_redistimeseries,"
         + "ftsb_redisearch,"
         + "aibench_run_inference_redisai_vision,ann-benchmarks",
diff --git a/redisbench_admin/run/memtier_benchmark/memtier_benchmark.py b/redisbench_admin/run/memtier_benchmark/memtier_benchmark.py
@@ -7,7 +7,7 @@
 import logging
 import re
 import subprocess
-
+import shlex
 from redisbench_admin.utils.remote import execute_remote_commands
 
 
@@ -26,13 +26,16 @@ def prepare_memtier_benchmark_command(
 
     if cluster_api_enabled:
         command_arr.extend(["--cluster-mode"])
-
-    for k in benchmark_config["parameters"]:
-        for kk in k.keys():
-            command_arr.extend(["--{}".format(kk), str(k[kk])])
+    if "parameters" in benchmark_config:
+        for k in benchmark_config["parameters"]:
+            for kk in k.keys():
+                command_arr.extend(["--{}".format(kk), str(k[kk])])
 
     command_arr.extend(["--json-out-file", result_file])
     command_str = " ".join(command_arr)
+    if "arguments" in benchmark_config:
+        command_str = command_str + " " + benchmark_config["arguments"]
+        command_arr.extend(shlex.split(benchmark_config["arguments"]))
     return command_arr, command_str
 
 
diff --git a/redisbench_admin/run_remote/remote_client.py b/redisbench_admin/run_remote/remote_client.py
@@ -16,7 +16,10 @@
     post_process_remote_run,
 )
 from redisbench_admin.utils.benchmark_config import extract_benchmark_tool_settings
-from redisbench_admin.utils.redisgraph_benchmark_go import setup_remote_benchmark_ann
+from redisbench_admin.utils.redisgraph_benchmark_go import (
+    setup_remote_benchmark_ann,
+    get_redisbench_admin_remote_path,
+)
 from redisbench_admin.utils.remote import (
     execute_remote_commands,
     fetch_file_from_remote_setup,
@@ -100,6 +103,34 @@ def run_remote_client_tool(
     if benchmark_tool == "redis-benchmark":
         tmp = local_bench_fname
         local_bench_fname = "result.csv"
+    commands = [command_str]
+    local_output_artifacts = []
+    remote_output_artifacts = []
+    if "ann" in benchmark_tool:
+        [recv_exit_status, stdout, stderr] = get_redisbench_admin_remote_path(
+            client_public_ip, username, private_key, client_ssh_port
+        )[0]
+        pkg_path = stdout[0].strip()
+        benchmark_suffix = local_bench_fname[: len(local_bench_fname) - 5]
+        create_website_path = pkg_path + "/run/ann/pkg/"
+        logging.info("Remote create website path: {}".format(create_website_path))
+        website_outputdir = "/tmp/website-{}".format(benchmark_suffix)
+        website_outputdir_zip = "/tmp/website-{}.zip".format(benchmark_suffix)
+        mkdir_command = "mkdir -p {}".format(website_outputdir)
+        create_website_command = (
+            "cd {} && sudo python3 create_website.py --outputdir {}".format(
+                create_website_path, website_outputdir
+            )
+        )
+        zip_website_command = "zip -r {} {}".format(
+            website_outputdir_zip, website_outputdir
+        )
+        commands.append(mkdir_command)
+        commands.append(create_website_command)
+        commands.append(zip_website_command)
+        local_output_artifacts.append(website_outputdir_zip)
+        remote_output_artifacts.append(website_outputdir_zip)
+
     benchmark_start_time = datetime.datetime.now()
     # run the benchmark
     remote_run_result, stdout, _ = run_remote_benchmark(
@@ -108,7 +139,7 @@ def run_remote_client_tool(
         private_key,
         remote_results_file,
         local_bench_fname,
-        command_str,
+        commands,
         client_ssh_port,
     )
     benchmark_end_time = datetime.datetime.now()
@@ -159,40 +190,74 @@ def run_remote_benchmark(
     client_public_ip,
     username,
     private_key,
-    remote_results_file,
-    local_results_file,
-    command,
+    remote_results_files,
+    local_results_files,
+    commands,
     ssh_port=22,
 ):
     remote_run_result = False
     res = execute_remote_commands(
-        client_public_ip, username, private_key, [command], ssh_port
+        client_public_ip, username, private_key, commands, ssh_port
     )
-    recv_exit_status, stdout, stderr = res[0]
+    recv_exit_status, _, _ = res[0]
 
     if recv_exit_status != 0:
         logging.error(
             "Exit status of remote command execution {}. Printing stdout and stderr".format(
                 recv_exit_status
             )
         )
-        logging.error("remote process stdout: {}".format(stdout))
-        logging.error("remote process stderr: {}".format(stderr))
+        stderr, stdout = print_commands_outputs(commands, True, res)
     else:
         logging.info(
             "Remote process exited normally. Exit code {}. Printing stdout.".format(
                 recv_exit_status
             )
         )
-        logging.info("remote process stdout: {}".format(stdout))
+        stderr, stdout = print_commands_outputs(commands, False, res)
+
         logging.info("Extracting the benchmark results")
         remote_run_result = True
-        if "ycsb" not in command:
-            fetch_file_from_remote_setup(
-                client_public_ip,
-                username,
-                private_key,
-                local_results_file,
-                remote_results_file,
-            )
+        if "ycsb" not in commands[0]:
+            if type(local_results_files) == str:
+                local_results_file = local_results_files
+                remote_results_file = remote_results_files
+                fetch_file_from_remote_setup(
+                    client_public_ip,
+                    username,
+                    private_key,
+                    local_results_file,
+                    remote_results_file,
+                )
+            if type(local_results_files) == list:
+                assert len(local_results_files) == len(remote_results_files)
+                for pos, local_results_file in enumerate(local_results_files):
+                    remote_results_file = remote_results_files[pos]
+                    fetch_file_from_remote_setup(
+                        client_public_ip,
+                        username,
+                        private_key,
+                        local_results_file,
+                        remote_results_file,
+                    )
     return remote_run_result, stdout, stderr
+
+
+def print_commands_outputs(commands, print_err, res):
+    bench_stdout = ""
+    bench_stderr = ""
+    for pos, res_tuple in enumerate(res):
+        recv_exit_status, stdout, stderr = res_tuple
+        if pos == 0:
+            stderr, stdout = stderr, stdout
+        logging.info(
+            "Exit status for command {}: {}".format(commands[pos], recv_exit_status)
+        )
+        logging.info("\tremote process stdout:")
+        for line in stdout:
+            print(line.strip())
+        if print_err:
+            logging.error("\tremote process stderr:")
+            for line in stderr:
+                print(line.strip())
+    return bench_stderr, bench_stdout
diff --git a/redisbench_admin/run_remote/run_remote.py b/redisbench_admin/run_remote/run_remote.py
@@ -186,6 +186,10 @@ def run_remote_command_logic(args, project_name, project_version):
     profiler_dashboard_table_headers = ["Setup", "Test-case", "Grafana Dashboard"]
     profiler_dashboard_links = []
 
+    benchmark_artifacts_table_name = "Benchmark client artifacts"
+    benchmark_artifacts_table_headers = ["Setup", "Test-case", "Artifact", "link"]
+    benchmark_artifacts_links = []
+
     # contains the overall target-tables ( if any target is defined )
     overall_tables = {}
 
@@ -762,6 +766,15 @@ def run_remote_command_logic(args, project_name, project_version):
                                         test_name
                                     )
                                 )
+
+    if len(benchmark_artifacts_links) > 0:
+        writer = MarkdownTableWriter(
+            table_name=benchmark_artifacts_table_name,
+            headers=benchmark_artifacts_table_headers,
+            value_matrix=benchmark_artifacts_links,
+        )
+        writer.write_table()
+
     if args.enable_profilers:
         writer = MarkdownTableWriter(
             table_name=profiler_dashboard_table_name,
diff --git a/redisbench_admin/utils/benchmark_config.py b/redisbench_admin/utils/benchmark_config.py
@@ -301,10 +301,16 @@ def extract_benchmark_type_from_config(
     benchmark_config_present = False
     benchmark_type = None
     if config_key in benchmark_config:
-        benchmark_config_present = True
-        for entry in benchmark_config[config_key]:
-            if benchmark_type_key in entry:
-                benchmark_type = entry[benchmark_type_key]
+
+        if type(benchmark_config[config_key]) == list:
+            for entry in benchmark_config[config_key]:
+                if benchmark_type_key in entry:
+                    benchmark_type = entry[benchmark_type_key]
+                    benchmark_config_present = True
+        elif type(benchmark_config[config_key]) == dict:
+            if benchmark_type_key in benchmark_config[config_key]:
+                benchmark_type = benchmark_config[config_key][benchmark_type_key]
+                benchmark_config_present = True
     if benchmark_type is None:
         logging.info(
             "Given the '{}' info was not present on {} we will assume the most inclusive default: '{}'".format(
diff --git a/tests/test_ann.py b/tests/test_ann.py
@@ -21,6 +21,7 @@ def test_prepare_ann_benchmark_command():
             benchmark_config["clientconfig"],
             "result.json",
             ".",
+            "ann",
         )
         assert (
             " ".join(command_arr[3:])
diff --git a/tests/test_benchmark_config.py b/tests/test_benchmark_config.py
@@ -6,6 +6,7 @@
     results_dict_kpi_check,
     check_required_modules,
     extract_redis_dbconfig_parameters,
+    extract_benchmark_type_from_config,
 )
 
 
@@ -113,3 +114,21 @@ def test_extract_redis_configuration_parameters():
             "redistimeseries": {"CHUNK_SIZE_BYTES": 128}
         }
         assert dbconfig_present == True
+
+
+def test_extract_benchmark_type_from_config():
+    with open("./tests/test_data/vecsim-memtier.yml", "r") as yml_file:
+        benchmark_config = yaml.safe_load(yml_file)
+        benchmark_config_present, benchmark_type = extract_benchmark_type_from_config(
+            benchmark_config
+        )
+        assert benchmark_type == "read-only"
+        assert benchmark_config_present == True
+
+    with open("./tests/test_data/redis-benchmark.yml", "r") as yml_file:
+        benchmark_config = yaml.safe_load(yml_file)
+        benchmark_config_present, benchmark_type = extract_benchmark_type_from_config(
+            benchmark_config
+        )
+        assert benchmark_type == "mixed"
+        assert benchmark_config_present == False
diff --git a/tests/test_data/vecsim-memtier.yml b/tests/test_data/vecsim-memtier.yml
@@ -0,0 +1,16 @@
+
+name: "vecsim_hybrid_HNSW_05"
+description: "hybrid hnsw with 0.5% filtered results"
+remote:
+ - type: oss-standalone
+ - setup: redisearch-m5d
+dbconfig:
+  - dataset_name: "hybrid-glove100-index-1"
+  - dataset: "https://s3.amazonaws.com/benchmarks.redislabs/redisearch/datasets/vecsim/hybrid-glove100-index-1.rdb"
+  - dataset_load_timeout_secs: 1800
+setups:
+  - oss-standalone
+clientconfig:
+  benchmark_type: "read-only"
+  tool: memtier_benchmark
+  arguments: "--command \"FT.SEARCH idx 'text0=>[KNN $k @hnsw_vector $BLOB]' PARAMS 4 k 10 BLOB aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" --test-time 180 -c 8 -t 2 --hide-histogram"
diff --git a/tests/test_memtier_benchmark.py b/tests/test_memtier_benchmark.py
@@ -0,0 +1,32 @@
+#  BSD 3-Clause License
+#
+#  Copyright (c) 2022., Redis Labs Modules
+#  All rights reserved.
+#
+import yaml
+
+from redisbench_admin.run.memtier_benchmark.memtier_benchmark import (
+    prepare_memtier_benchmark_command,
+)
+
+
+def test_prepare_memtier_benchmark_command():
+    with open("./tests/test_data/vecsim-memtier.yml", "r") as yml_file:
+        benchmark_config = yaml.safe_load(yml_file)
+        command_arr, command_str = prepare_memtier_benchmark_command(
+            "memtier_benchmark",
+            "localhost",
+            "6380",
+            benchmark_config["clientconfig"],
+            False,
+            "result.json",
+        )
+        assert (
+            command_str
+            == "memtier_benchmark -s localhost -p 6380 --hide-histogram --json-out-file result.json --command \"FT.SEARCH idx 'text0=>[KNN $k @hnsw_vector $BLOB]' PARAMS 4 k 10 BLOB aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\" --test-time 180 -c 8 -t 2 --hide-histogram"
+        )
+        assert (
+            command_arr[9]
+            == "FT.SEARCH idx 'text0=>[KNN $k @hnsw_vector $BLOB]' PARAMS 4 k 10 BLOB aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+        )
+        assert len(command_arr) == 17
diff --git a/tests/test_remote_client.py b/tests/test_remote_client.py
@@ -6,7 +6,6 @@
 import yaml
 
 from redisbench_admin.run.common import prepare_benchmark_parameters
-from redisbench_admin.run_remote.remote_client import run_remote_benchmark
 from redisbench_admin.utils.benchmark_config import extract_benchmark_tool_settings
 
 

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@ def test_prepare_ann_benchmark_command():`
`21`	`21`	`benchmark_config["clientconfig"],`
`22`	`22`	`"result.json",`
`23`	`23`	`".",`
	`24`	`+ "ann",`
`24`	`25`	`)`
`25`	`26`	`assert (`
`26`	`27`	`" ".join(command_arr[3:])`