Skip to content

Commit

Permalink
Updated watchdog to avoid crash on missing Tags (#423)
Browse files Browse the repository at this point in the history
* Added support for spot instance setups

* Fixed CI tests based uppon latest changes of tools

* Updating spot/full-price counter

* Further improved the comparison str generated between baseline/comparison data

* Updated watchdog to avoid crash on missing Tags
  • Loading branch information
filipecosta90 authored Jan 31, 2024
1 parent 7405674 commit c3eccd6
Show file tree
Hide file tree
Showing 13 changed files with 68 additions and 23 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ jobs:
pytest:
strategy:
matrix:
python-version: [ '3.7', '3.8', '3.9', '3.10' ]
python-version: [ '3.9', '3.10' ]
fail-fast: false
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
USING_COVERAGE: "3.10"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "redisbench-admin"
version = "0.10.17"
version = "0.10.19"
description = "Redis benchmark run helper. A wrapper around Redis and Redis Modules benchmark tools ( ftsb_redisearch, memtier_benchmark, redis-benchmark, aibench, etc... )."
authors = ["filipecosta90 <filipecosta.90@gmail.com>","Redis Performance Group <performance@redis.com>"]
readme = "README.md"
Expand Down
9 changes: 7 additions & 2 deletions redisbench_admin/compare/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def get_by_strings(
baseline_str = baseline_branch
if comparison_branch is not None:
comparison_covered = True
by_str_baseline = "branch"
by_str_comparison = "branch"
comparison_str = comparison_branch

if baseline_tag is not None:
Expand Down Expand Up @@ -960,7 +960,12 @@ def check_multi_value_filter(baseline_str):


def prepare_value_str(baseline_pct_change, baseline_v, baseline_values, simplify_table):
baseline_v_str = " {:.0f}".format(baseline_v)
if baseline_v < 1.0:
baseline_v_str = " {:.2f}".format(baseline_v)
elif baseline_v < 10.0:
baseline_v_str = " {:.1f}".format(baseline_v)
else:
baseline_v_str = " {:.0f}".format(baseline_v)
stamp_b = ""
if baseline_pct_change > 10.0:
stamp_b = "UNSTABLE "
Expand Down
8 changes: 7 additions & 1 deletion redisbench_admin/run_remote/remote_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def remote_env_setup(
tf_override_name=None,
tf_folder_path=None,
spot_instance_error=False,
spot_price_counter=0,
full_price_counter=0,
):
server_plaintext_port = args.db_port
db_ssh_port = args.db_ssh_port
Expand Down Expand Up @@ -85,6 +87,7 @@ def remote_env_setup(
tf_folder_spot_path,
)
spot_available_and_used = True
spot_price_counter = spot_price_counter + 1
except TerraformCommandError as error:
spot_instance_error = True
logging.error(
Expand All @@ -95,7 +98,7 @@ def remote_env_setup(
pass
else:
logging.warning(
f"Even though there is a spot instance config, avoiding deploying it."
"Even though there is a spot instance config, avoiding deploying it."
)
if spot_available_and_used is False:
(
Expand All @@ -121,6 +124,7 @@ def remote_env_setup(
tf_override_name,
tf_folder_path,
)
full_price_counter = full_price_counter + 1
logging.info("Using the following connection addresses.")
logging.info(f"client_public_ip={client_public_ip}")
logging.info(f"server_public_ip={server_public_ip}")
Expand All @@ -134,4 +138,6 @@ def remote_env_setup(
client_ssh_port,
username,
spot_instance_error,
spot_price_counter,
full_price_counter,
)
8 changes: 6 additions & 2 deletions redisbench_admin/run_remote/remote_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,12 @@ def post_process_remote_run(
start_time_str,
stdout,
)
with open(local_benchmark_output_filename, "r") as json_file:
results_dict = json.load(json_file)
try:
with open(local_benchmark_output_filename, "r") as json_file:
results_dict = json.load(json_file)
except json.decoder.JSONDecodeError as e:
logging.error("Received error while decoding JSON: {}".format(e.__str__()))
pass
# check KPIs
return_code = results_dict_kpi_check(benchmark_config, results_dict, return_code)
# if the benchmark tool is redisgraph-benchmark-go and
Expand Down
31 changes: 29 additions & 2 deletions redisbench_admin/run_remote/run_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ def run_remote_command_logic(args, project_name, project_version):

# Used to only deploy spot once per run
spot_instance_error = False
ts_key_spot_price = f"ts:{tf_triggering_env}:tests:spot_price"
ts_key_full_price = f"ts:{tf_triggering_env}:tests:full_price"

for benchmark_type, bench_by_dataset_map in benchmark_runs_plan.items():
if return_code != 0 and args.fail_fast:
Expand Down Expand Up @@ -418,6 +420,8 @@ def run_remote_command_logic(args, project_name, project_version):
client_ssh_port,
username,
spot_instance_error,
spot_price_counter,
full_price_counter,
) = remote_env_setup(
args,
benchmark_config,
Expand All @@ -435,16 +439,37 @@ def run_remote_command_logic(args, project_name, project_version):
TF_OVERRIDE_NAME,
TF_OVERRIDE_REMOTE,
spot_instance_error,
0,
0,
)

# after we've created the env, even on error we should always teardown
# in case of some unexpected error we fail the test
try:
(
_,
_,
start_time_setup_ms,
testcase_start_time_str,
) = get_start_time_vars()
if args.push_results_redistimeseries:
logging.info(
f"Updating overall spot price tests counter {ts_key_spot_price}"
)
rts.ts().add(
ts_key_spot_price,
start_time_setup_ms,
spot_price_counter,
duplicate_policy="sum",
)
logging.info(
f"Updating overall spot price full counter {ts_key_spot_price}"
)
rts.ts().add(
ts_key_full_price,
start_time_setup_ms,
full_price_counter,
duplicate_policy="sum",
)
logname = "{}_{}.log".format(
test_name, testcase_start_time_str
)
Expand Down Expand Up @@ -871,7 +896,9 @@ def run_remote_command_logic(args, project_name, project_version):
)
return_code |= 1
raise Exception(
"Failed to run remote benchmark."
"Failed to run remote benchmark. {}".format(
e.__str__()
)
)

if setup_details["env"] is None:
Expand Down
1 change: 1 addition & 0 deletions redisbench_admin/utils/benchmark_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def parse_exporter_timemetric(metric_path: str, results_dict: dict):
logging.error(
"Unable to parse time-metric {}. Error: {}".format(metric_path, e.__str__())
)
pass
return datapoints_timestamp


Expand Down
2 changes: 0 additions & 2 deletions redisbench_admin/utils/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,8 +578,6 @@ def fetch_remote_setup_from_config(
setup_type = remote_setup_property["type"]
if "setup" in remote_setup_property:
setup = remote_setup_property["setup"]
if "spot_instance" in remote_setup_property:
spot_path = "/terraform/" + remote_setup_property["spot_instance"]
# fetch terraform folder
path = "/terraform/{}-{}".format(setup_type, setup)
terraform_working_dir = common_tf(branch, path, repo)
Expand Down
18 changes: 10 additions & 8 deletions redisbench_admin/watchdog/watchdog.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ def get_ci_ec2_instances_by_state(ec2_client, ci_machines_prefix, requested_stat
def get_vname_timeout_secs(instance):
vm_name = ""
timeout_secs = None
for tag_dict in instance["Tags"]:
key = tag_dict["Key"]
key_v = tag_dict["Value"]
if key == "Name":
vm_name = key_v
if key == "timeout_secs":
timeout_secs = int(key_v)
if "Tags" in instance:
for tag_dict in instance["Tags"]:
key = tag_dict["Key"]
key_v = tag_dict["Value"]
if key == "Name":
vm_name = key_v
if key == "timeout_secs":
timeout_secs = int(key_v)
return vm_name, timeout_secs


Expand Down Expand Up @@ -94,7 +95,8 @@ def termination_check(
total_instances,
vm_name,
):
if ci_machines_prefix in vm_name:
# terminates also VMs without name set
if ci_machines_prefix in vm_name or vm_name == "":
total_instances = total_instances + 1
elapsed = current_datetime - launch_time
will_terminate = False
Expand Down
1 change: 1 addition & 0 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ def test_extract_test_feasible_setups():
defaults_filename = "./tests/test_data/common-properties-v0.3.yml"
(
default_kpis,
_,
default_metrics,
exporter_timemetric_path,
default_specs,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_compare_command_logic():
assert total_improvements == 0
assert detected_regressions == []
# ensure that we have testcases date
assert "(1 datapoints)" in comment_body
assert "0.0%" in comment_body
assert (
"Detected a total of {} stable tests between versions".format(total_tests)
in comment_body
Expand Down
2 changes: 1 addition & 1 deletion tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_export_command_logic():
args = parser.parse_args(
args=[
"--benchmark-result-file",
"./tests/test_data/result_report_1690282709.835491.json",
"./tests/test_data/results/result_report_1690282709.835491.json",
"--exporter-spec-file",
"./tests/test_data/common-properties-enterprise.yml",
"--redistimeseries_host",
Expand Down
4 changes: 2 additions & 2 deletions tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def test_generate_standalone_redis_server_args():
"--dir",
".",
"--loadmodule",
os.path.abspath(local_module_file),
local_module_file,
]

cmd = generate_standalone_redis_server_args(
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_generate_standalone_redis_server_args():
"--dir",
".",
"--loadmodule",
os.path.abspath(local_module_file),
local_module_file,
"CHUNK_SIZE_BYTES",
"128",
]
Expand Down

0 comments on commit c3eccd6

Please sign in to comment.