Skip to content

Commit

Permalink
Enabling collapse for both JSON and JUNIT
Browse files Browse the repository at this point in the history
Signed-off-by: Vishnu Challa <vchalla@vchalla-thinkpadp1gen2.rmtusnc.csb>
  • Loading branch information
Vishnu Challa committed Feb 19, 2025
1 parent d05e3f4 commit a768a44
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 58 deletions.
12 changes: 6 additions & 6 deletions examples/ols-load-generator.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
tests :
- name : ols-load-generator-{{ ols_test_workers }}w-5m
index: perf_scale_ci*
benchmarkIndex: ols-load-test-results*
index: {{ es_metadata_index }}
benchmarkIndex: {{ es_benchmark_index }}
metadata:
olsTestWorkers: {{ ols_test_workers }}
olsTestDuration: "5m"
Expand Down Expand Up @@ -128,8 +128,8 @@ tests :
direction: 1

- name : ols-load-generator-{{ ols_test_workers }}w-10m
index: perf_scale_ci*
benchmarkIndex: ols-load-test-results*
index: {{ es_metadata_index }}
benchmarkIndex: {{ es_benchmark_index }}
metadata:
olsTestWorkers: {{ ols_test_workers }}
olsTestDuration: "10m"
Expand Down Expand Up @@ -256,8 +256,8 @@ tests :
direction: 1

- name : ols-load-generator-{{ ols_test_workers }}w-20m
index: perf_scale_ci*
benchmarkIndex: ols-load-test-results*
index: {{ es_metadata_index }}
benchmarkIndex: {{ es_benchmark_index }}
metadata:
olsTestWorkers: {{ ols_test_workers }}
olsTestDuration: "20m"
Expand Down
36 changes: 27 additions & 9 deletions pkg/algorithms/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def output_json(self) -> Tuple[str, str, bool]:
_, change_points_by_metric = self._analyze()
dataframe_json = self.dataframe.to_json(orient="records")
dataframe_json = json.loads(dataframe_json)
collapsed_json = []

for index, entry in enumerate(dataframe_json):
entry["metrics"] = {
Expand All @@ -60,14 +61,32 @@ def output_json(self) -> Tuple[str, str, bool]:
percentage_change * self.metrics_config[key]["direction"] > 0
or self.metrics_config[key]["direction"] == 0
):
dataframe_json[index]["metrics"][key][
"percentage_change"
] = percentage_change
dataframe_json[index]["metrics"][key]["percentage_change"] = (
percentage_change
)
dataframe_json[index]["is_changepoint"] = True
if self.options["collapse"]:
if (
index > 0
and dataframe_json[index - 1] not in collapsed_json
):
collapsed_json.append(dataframe_json[index - 1])
if dataframe_json[index] not in collapsed_json:
collapsed_json.append(dataframe_json[index])
if (
index < len(dataframe_json) - 1
and dataframe_json[index + 1] not in collapsed_json
):
collapsed_json.append(dataframe_json[index + 1])
return_json = collapsed_json if self.options["collapse"] else dataframe_json

return (
self.test["name"],
json.dumps(return_json, indent=2),
self.regression_flag,
)

return self.test["name"], json.dumps(dataframe_json, indent=2), self.regression_flag

def output_text(self) -> Tuple[str,str, bool]:
def output_text(self) -> Tuple[str, str, bool]:
"""Outputs the data in text/tabular format"""
series, change_points_by_metric = self._analyze()
change_points_by_time = self.group_change_points_by_time(
Expand All @@ -79,7 +98,7 @@ def output_text(self) -> Tuple[str,str, bool]:
)
return self.test["name"], output_table, self.regression_flag

def output_junit(self) -> Tuple[str,str, bool]:
def output_junit(self) -> Tuple[str, str, bool]:
"""Output junit format
Returns:
Expand All @@ -91,7 +110,6 @@ def output_junit(self) -> Tuple[str,str, bool]:
test_name=test_name,
data_json=data_json,
metrics_config=self.metrics_config,
options=self.options,
)
return test_name, data_junit, self.regression_flag

Expand Down Expand Up @@ -157,7 +175,7 @@ def setup_series(self) -> Series:

return series

def output(self, output_format) -> Union[Any,None]:
def output(self, output_format) -> Union[Any, None]:
"""Method to select output method
Args:
Expand Down
102 changes: 60 additions & 42 deletions pkg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
import pyshorteners




# pylint: disable=too-many-locals
def get_metric_data(
uuids: List[str], index: str, metrics: Dict[str, Any], match: Matcher
Expand Down Expand Up @@ -52,9 +50,13 @@ def get_metric_data(
logger_instance.info("Collecting %s", metric_name)
try:
if "agg" in metric:
metric_df, metric_dataframe_name = process_aggregation_metric(uuids, index, metric, match)
metric_df, metric_dataframe_name = process_aggregation_metric(
uuids, index, metric, match
)
else:
metric_df, metric_dataframe_name = process_standard_metric(uuids, index, metric, match, metric_value_field)
metric_df, metric_dataframe_name = process_standard_metric(
uuids, index, metric, match, metric_value_field
)

metric["labels"] = labels
metric["direction"] = direction
Expand All @@ -69,6 +71,7 @@ def get_metric_data(
)
return dataframe_list, metrics_config


def process_aggregation_metric(
uuids: List[str], index: str, metric: Dict[str, Any], match: Matcher
) -> pd.DataFrame:
Expand All @@ -87,13 +90,24 @@ def process_aggregation_metric(
aggregation_value = metric["agg"]["value"]
aggregation_type = metric["agg"]["agg_type"]
aggregation_name = f"{aggregation_value}_{aggregation_type}"
aggregated_df = match.convert_to_df(aggregated_metric_data, columns=["uuid", "timestamp", aggregation_name])
aggregated_df = match.convert_to_df(
aggregated_metric_data, columns=["uuid", "timestamp", aggregation_name]
)
aggregated_df = aggregated_df.drop_duplicates(subset=["uuid"], keep="first")
aggregated_metric_name = f"{metric['name']}_{aggregation_type}"
aggregated_df = aggregated_df.rename(columns={aggregation_name: aggregated_metric_name})
aggregated_df = aggregated_df.rename(
columns={aggregation_name: aggregated_metric_name}
)
return aggregated_df, aggregated_metric_name

def process_standard_metric(uuids: List[str], index: str, metric: Dict[str, Any], match: Matcher, metric_value_field: str) -> pd.DataFrame:

def process_standard_metric(
uuids: List[str],
index: str,
metric: Dict[str, Any],
match: Matcher,
metric_value_field: str,
) -> pd.DataFrame:
"""Method to get dataframe of standard metric
Args:
Expand All @@ -106,13 +120,18 @@ def process_standard_metric(uuids: List[str], index: str, metric: Dict[str, Any]
Returns:
pd.DataFrame: _description_
"""
standard_metric_data = match.getResults("",uuids, index, metric)
standard_metric_df = match.convert_to_df(standard_metric_data, columns=["uuid", "timestamp", metric_value_field])
standard_metric_data = match.getResults("", uuids, index, metric)
standard_metric_df = match.convert_to_df(
standard_metric_data, columns=["uuid", "timestamp", metric_value_field]
)
standard_metric_name = f"{metric['name']}_{metric_value_field}"
standard_metric_df = standard_metric_df.rename(columns={metric_value_field: standard_metric_name})
standard_metric_df = standard_metric_df.rename(
columns={metric_value_field: standard_metric_name}
)
standard_metric_df = standard_metric_df.drop_duplicates()
return standard_metric_df, standard_metric_name


def extract_metadata_from_test(test: Dict[str, Any]) -> Dict[Any, Any]:
"""Gets metadata of the run from each test
Expand All @@ -129,9 +148,6 @@ def extract_metadata_from_test(test: Dict[str, Any]) -> Dict[Any, Any]:
return metadata





def get_datasource(data: Dict[Any, Any]) -> str:
"""Gets es url from config or env
Expand All @@ -157,7 +173,7 @@ def filter_uuids_on_index(
uuids: List[str],
match: Matcher,
baseline: str,
filter_node_count: bool
filter_node_count: bool,
) -> List[str]:
"""returns the index to be used and runs as uuids
Expand Down Expand Up @@ -219,9 +235,18 @@ def process_test(
fingerprint_index = test["index"]

# getting metadata
metadata = extract_metadata_from_test(test) if options["uuid"] in ("", None) else get_metadata_with_uuid(options["uuid"], match)
metadata = (
extract_metadata_from_test(test)
if options["uuid"] in ("", None)
else get_metadata_with_uuid(options["uuid"], match)
)
# get uuids, buildUrls matching with the metadata
runs = match.get_uuid_by_metadata(metadata, fingerprint_index, lookback_date=start_timestamp, lookback_size=options['lookback_size'])
runs = match.get_uuid_by_metadata(
metadata,
fingerprint_index,
lookback_date=start_timestamp,
lookback_size=options["lookback_size"],
)
uuids = [run["uuid"] for run in runs]
buildUrls = {run["uuid"]: run["buildUrl"] for run in runs}
# get uuids if there is a baseline
Expand All @@ -236,7 +261,12 @@ def process_test(
benchmark_index = test["benchmarkIndex"]

uuids = filter_uuids_on_index(
metadata, benchmark_index, uuids, match, options["baseline"], options['node_count']
metadata,
benchmark_index,
uuids,
match,
options["baseline"],
options["node_count"],
)
# get metrics data and dataframe
metrics = test["metrics"]
Expand All @@ -262,15 +292,15 @@ def process_test(
if options["convert_tinyurl"]
else buildUrls[uuid]
)

# pylint: disable = cell-var-from-loop
)
merged_df=merged_df.reset_index(drop=True)
#save the dataframe
merged_df = merged_df.reset_index(drop=True)
# save the dataframe
output_file_path = f"{options['save_data_path'].split('.')[0]}-{test['name']}.csv"
match.save_results(merged_df, csv_file_path=output_file_path)
return merged_df, metrics_config


def shorten_url(shortener: any, uuids: str) -> str:
"""Shorten url if there is a list of buildUrls
Expand All @@ -284,9 +314,10 @@ def shorten_url(shortener: any, uuids: str) -> str:
short_url_list = []
for buildUrl in uuids.split(","):
short_url_list.append(shortener.tinyurl.short(buildUrl))
short_url = ','.join(short_url_list)
short_url = ",".join(short_url_list)
return short_url


def get_metadata_with_uuid(uuid: str, match: Matcher) -> Dict[Any, Any]:
"""Gets metadata of the run from each test
Expand Down Expand Up @@ -333,10 +364,7 @@ def get_metadata_with_uuid(uuid: str, match: Matcher) -> Dict[Any, Any]:


def json_to_junit(
test_name: str,
data_json: Dict[Any, Any],
metrics_config: Dict[Any, Any],
options: Dict[Any, Any],
test_name: str, data_json: Dict[Any, Any], metrics_config: Dict[Any, Any]
) -> str:
"""Convert json to junit format
Expand Down Expand Up @@ -371,11 +399,7 @@ def json_to_junit(
failures_count += 1
failure = ET.SubElement(testcase, "failure")
failure.text = (
"\n"
+ generate_tabular_output(
data_json, metric_name=metric, collapse=options["collapse"]
)
+ "\n"
"\n" + generate_tabular_output(data_json, metric_name=metric) + "\n"
)

testsuite.set("failures", str(failures_count))
Expand All @@ -386,7 +410,7 @@ def json_to_junit(
return pretty_xml_as_string


def generate_tabular_output(data: list, metric_name: str, collapse: bool) -> str:
def generate_tabular_output(data: list, metric_name: str) -> str:
"""converts json to tabular format
Args:
Expand All @@ -406,16 +430,8 @@ def generate_tabular_output(data: list, metric_name: str, collapse: bool) -> str
"is_changepoint": bool(record["metrics"][metric_name]["percentage_change"]),
"percentage_change": record["metrics"][metric_name]["percentage_change"],
}
if collapse:
for i in range(1, len(data)):
if data[i]["metrics"][metric_name]["percentage_change"] != 0:
records.append(create_record(data[i - 1]))
records.append(create_record(data[i]))
if i + 1 < len(data):
records.append(create_record(data[i + 1]))
else:
for i in range(0, len(data)):
records.append(create_record(data[i]))
for i in range(0, len(data)):
records.append(create_record(data[i]))

df = pd.DataFrame(records).drop_duplicates().reset_index(drop=True)
table = tabulate(df, headers="keys", tablefmt="psql")
Expand All @@ -424,7 +440,9 @@ def generate_tabular_output(data: list, metric_name: str, collapse: bool) -> str
if lines:
highlighted_lines += lines[0:3]
for i, line in enumerate(lines[3:-1]):
if df["percentage_change"][i]: # Offset by 3 to account for header and separator
if df["percentage_change"][
i
]: # Offset by 3 to account for header and separator
highlighted_line = f"{lines[i+3]} -- changepoint"
highlighted_lines.append(highlighted_line)
else:
Expand Down
2 changes: 1 addition & 1 deletion test.bats
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ setup() {

@test "orion cmd ols configuration test " {
export ols_test_workers=10
run_cmd orion cmd --config "examples/ols-load-generator.yaml" --hunter-analyze
run_cmd es_metadata_index="perf_scale_ci*" es_benchmark_index="ols-load-test-results*" orion cmd --config "examples/ols-load-generator.yaml" --hunter-analyze
}

@test "orion daemon small scale cluster density with anomaly detection " {
Expand Down

0 comments on commit a768a44

Please sign in to comment.