Updated watchdog to avoid crash on missing Tags (#423)

* Added support for spot instance setups * Fixed CI tests based uppon latest changes of tools * Updating spot/full-price counter * Further improved the comparison str generated between baseline/comparison data * Updated watchdog to avoid crash on missing Tags
redis-performance · Jan 31, 2024 · c3eccd6 · c3eccd6
1 parent 7405674
commit c3eccd6
Show file tree

Hide file tree

Showing 13 changed files with 68 additions and 23 deletions.
diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
@@ -9,7 +9,8 @@ jobs:
   pytest:
     strategy:
       matrix:
-        python-version: [ '3.7', '3.8', '3.9', '3.10' ]
+        python-version: [ '3.9', '3.10' ]
+      fail-fast: false
     env:
       ACTIONS_ALLOW_UNSECURE_COMMANDS: true
       USING_COVERAGE: "3.10"

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "redisbench-admin"
-version = "0.10.17"
+version = "0.10.19"
 description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )."
 authors = ["filipecosta90 <filipecosta.90@gmail.com>","Redis Performance Group <performance@redis.com>"]
 readme = "README.md"

diff --git a/redisbench_admin/compare/compare.py b/redisbench_admin/compare/compare.py
@@ -649,7 +649,7 @@ def get_by_strings(
         baseline_str = baseline_branch
     if comparison_branch is not None:
         comparison_covered = True
-        by_str_baseline = "branch"
+        by_str_comparison = "branch"
         comparison_str = comparison_branch
 
     if baseline_tag is not None:
@@ -960,7 +960,12 @@ def check_multi_value_filter(baseline_str):
 
 
 def prepare_value_str(baseline_pct_change, baseline_v, baseline_values, simplify_table):
-    baseline_v_str = " {:.0f}".format(baseline_v)
+    if baseline_v < 1.0:
+        baseline_v_str = " {:.2f}".format(baseline_v)
+    elif baseline_v < 10.0:
+        baseline_v_str = " {:.1f}".format(baseline_v)
+    else:
+        baseline_v_str = " {:.0f}".format(baseline_v)
     stamp_b = ""
     if baseline_pct_change > 10.0:
         stamp_b = "UNSTABLE "

diff --git a/redisbench_admin/run_remote/remote_env.py b/redisbench_admin/run_remote/remote_env.py
@@ -31,6 +31,8 @@ def remote_env_setup(
     tf_override_name=None,
     tf_folder_path=None,
     spot_instance_error=False,
+    spot_price_counter=0,
+    full_price_counter=0,
 ):
     server_plaintext_port = args.db_port
     db_ssh_port = args.db_ssh_port
@@ -85,6 +87,7 @@ def remote_env_setup(
                         tf_folder_spot_path,
                     )
                     spot_available_and_used = True
+                    spot_price_counter = spot_price_counter + 1
                 except TerraformCommandError as error:
                     spot_instance_error = True
                     logging.error(
@@ -95,7 +98,7 @@ def remote_env_setup(
                     pass
             else:
                 logging.warning(
-                    f"Even though there is a spot instance config, avoiding deploying it."
+                    "Even though there is a spot instance config, avoiding deploying it."
                 )
         if spot_available_and_used is False:
             (
@@ -121,6 +124,7 @@ def remote_env_setup(
                 tf_override_name,
                 tf_folder_path,
             )
+            full_price_counter = full_price_counter + 1
     logging.info("Using the following connection addresses.")
     logging.info(f"client_public_ip={client_public_ip}")
     logging.info(f"server_public_ip={server_public_ip}")
@@ -134,4 +138,6 @@ def remote_env_setup(
         client_ssh_port,
         username,
         spot_instance_error,
+        spot_price_counter,
+        full_price_counter,
     )
diff --git a/redisbench_admin/run_remote/remote_helpers.py b/redisbench_admin/run_remote/remote_helpers.py
@@ -286,8 +286,12 @@ def post_process_remote_run(
             start_time_str,
             stdout,
         )
-    with open(local_benchmark_output_filename, "r") as json_file:
-        results_dict = json.load(json_file)
+    try:
+        with open(local_benchmark_output_filename, "r") as json_file:
+            results_dict = json.load(json_file)
+    except json.decoder.JSONDecodeError as e:
+        logging.error("Received error while decoding JSON: {}".format(e.__str__()))
+        pass
     # check KPIs
     return_code = results_dict_kpi_check(benchmark_config, results_dict, return_code)
     # if the benchmark tool is redisgraph-benchmark-go and

diff --git a/redisbench_admin/run_remote/run_remote.py b/redisbench_admin/run_remote/run_remote.py
@@ -306,6 +306,8 @@ def run_remote_command_logic(args, project_name, project_version):
 
     # Used to only deploy spot once per run
     spot_instance_error = False
+    ts_key_spot_price = f"ts:{tf_triggering_env}:tests:spot_price"
+    ts_key_full_price = f"ts:{tf_triggering_env}:tests:full_price"
 
     for benchmark_type, bench_by_dataset_map in benchmark_runs_plan.items():
         if return_code != 0 and args.fail_fast:
@@ -418,6 +420,8 @@ def run_remote_command_logic(args, project_name, project_version):
                                     client_ssh_port,
                                     username,
                                     spot_instance_error,
+                                    spot_price_counter,
+                                    full_price_counter,
                                 ) = remote_env_setup(
                                     args,
                                     benchmark_config,
@@ -435,16 +439,37 @@ def run_remote_command_logic(args, project_name, project_version):
                                     TF_OVERRIDE_NAME,
                                     TF_OVERRIDE_REMOTE,
                                     spot_instance_error,
+                                    0,
+                                    0,
                                 )
 
                                 # after we've created the env, even on error we should always teardown
                                 # in case of some unexpected error we fail the test
                                 try:
                                     (
                                         _,
-                                        _,
+                                        start_time_setup_ms,
                                         testcase_start_time_str,
                                     ) = get_start_time_vars()
+                                    if args.push_results_redistimeseries:
+                                        logging.info(
+                                            f"Updating overall spot price tests counter {ts_key_spot_price}"
+                                        )
+                                        rts.ts().add(
+                                            ts_key_spot_price,
+                                            start_time_setup_ms,
+                                            spot_price_counter,
+                                            duplicate_policy="sum",
+                                        )
+                                        logging.info(
+                                            f"Updating overall spot price full counter {ts_key_spot_price}"
+                                        )
+                                        rts.ts().add(
+                                            ts_key_full_price,
+                                            start_time_setup_ms,
+                                            full_price_counter,
+                                            duplicate_policy="sum",
+                                        )
                                     logname = "{}_{}.log".format(
                                         test_name, testcase_start_time_str
                                     )
@@ -871,7 +896,9 @@ def run_remote_command_logic(args, project_name, project_version):
                                                 )
                                                 return_code |= 1
                                                 raise Exception(
-                                                    "Failed to run remote benchmark."
+                                                    "Failed to run remote benchmark. {}".format(
+                                                        e.__str__()
+                                                    )
                                                 )
 
                                         if setup_details["env"] is None:

diff --git a/redisbench_admin/utils/benchmark_config.py b/redisbench_admin/utils/benchmark_config.py
@@ -49,6 +49,7 @@ def parse_exporter_timemetric(metric_path: str, results_dict: dict):
         logging.error(
             "Unable to parse time-metric {}. Error: {}".format(metric_path, e.__str__())
         )
+        pass
     return datapoints_timestamp
 
 

diff --git a/redisbench_admin/utils/remote.py b/redisbench_admin/utils/remote.py
@@ -578,8 +578,6 @@ def fetch_remote_setup_from_config(
                 setup_type = remote_setup_property["type"]
             if "setup" in remote_setup_property:
                 setup = remote_setup_property["setup"]
-            if "spot_instance" in remote_setup_property:
-                spot_path = "/terraform/" + remote_setup_property["spot_instance"]
         # fetch terraform folder
         path = "/terraform/{}-{}".format(setup_type, setup)
     terraform_working_dir = common_tf(branch, path, repo)

diff --git a/redisbench_admin/watchdog/watchdog.py b/redisbench_admin/watchdog/watchdog.py
@@ -45,13 +45,14 @@ def get_ci_ec2_instances_by_state(ec2_client, ci_machines_prefix, requested_stat
 def get_vname_timeout_secs(instance):
     vm_name = ""
     timeout_secs = None
-    for tag_dict in instance["Tags"]:
-        key = tag_dict["Key"]
-        key_v = tag_dict["Value"]
-        if key == "Name":
-            vm_name = key_v
-        if key == "timeout_secs":
-            timeout_secs = int(key_v)
+    if "Tags" in instance:
+        for tag_dict in instance["Tags"]:
+            key = tag_dict["Key"]
+            key_v = tag_dict["Value"]
+            if key == "Name":
+                vm_name = key_v
+            if key == "timeout_secs":
+                timeout_secs = int(key_v)
     return vm_name, timeout_secs
 
 
@@ -94,7 +95,8 @@ def termination_check(
     total_instances,
     vm_name,
 ):
-    if ci_machines_prefix in vm_name:
+    # terminates also VMs without name set
+    if ci_machines_prefix in vm_name or vm_name == "":
         total_instances = total_instances + 1
         elapsed = current_datetime - launch_time
         will_terminate = False

diff --git a/tests/test_common.py b/tests/test_common.py
@@ -366,6 +366,7 @@ def test_extract_test_feasible_setups():
     defaults_filename = "./tests/test_data/common-properties-v0.3.yml"
     (
         default_kpis,
+        _,
         default_metrics,
         exporter_timemetric_path,
         default_specs,

diff --git a/tests/test_compare.py b/tests/test_compare.py
@@ -134,7 +134,7 @@ def test_compare_command_logic():
         assert total_improvements == 0
         assert detected_regressions == []
         # ensure that we have testcases date
-        assert "(1 datapoints)" in comment_body
+        assert "0.0%" in comment_body
         assert (
             "Detected a total of {} stable tests between versions".format(total_tests)
             in comment_body

diff --git a/tests/test_export.py b/tests/test_export.py
@@ -90,7 +90,7 @@ def test_export_command_logic():
     args = parser.parse_args(
         args=[
             "--benchmark-result-file",
-            "./tests/test_data/result_report_1690282709.835491.json",
+            "./tests/test_data/results/result_report_1690282709.835491.json",
             "--exporter-spec-file",
             "./tests/test_data/common-properties-enterprise.yml",
             "--redistimeseries_host",

diff --git a/tests/test_local.py b/tests/test_local.py
@@ -124,7 +124,7 @@ def test_generate_standalone_redis_server_args():
         "--dir",
         ".",
         "--loadmodule",
-        os.path.abspath(local_module_file),
+        local_module_file,
     ]
 
     cmd = generate_standalone_redis_server_args(
@@ -156,7 +156,7 @@ def test_generate_standalone_redis_server_args():
         "--dir",
         ".",
         "--loadmodule",
-        os.path.abspath(local_module_file),
+        local_module_file,
         "CHUNK_SIZE_BYTES",
         "128",
     ]