Enabling collapse for both JSON and JUNIT

Signed-off-by: Vishnu Challa <vchalla@vchalla-thinkpadp1gen2.rmtusnc.csb>
cloud-bulldozer · Feb 19, 2025 · a768a44 · a768a44
1 parent d05e3f4
commit a768a44
Show file tree

Hide file tree

Showing 4 changed files with 94 additions and 58 deletions.
diff --git a/examples/ols-load-generator.yaml b/examples/ols-load-generator.yaml
@@ -1,7 +1,7 @@
 tests :
   - name : ols-load-generator-{{ ols_test_workers }}w-5m
-    index: perf_scale_ci*
-    benchmarkIndex: ols-load-test-results*
+    index: {{ es_metadata_index }}
+    benchmarkIndex: {{ es_benchmark_index }}
     metadata:
       olsTestWorkers: {{ ols_test_workers }}
       olsTestDuration: "5m"
@@ -128,8 +128,8 @@ tests :
       direction: 1
 
   - name : ols-load-generator-{{ ols_test_workers }}w-10m
-    index: perf_scale_ci*
-    benchmarkIndex: ols-load-test-results*
+    index: {{ es_metadata_index }}
+    benchmarkIndex: {{ es_benchmark_index }}
     metadata:
       olsTestWorkers: {{ ols_test_workers }}
       olsTestDuration: "10m"
@@ -256,8 +256,8 @@ tests :
       direction: 1
 
   - name : ols-load-generator-{{ ols_test_workers }}w-20m
-    index: perf_scale_ci*
-    benchmarkIndex: ols-load-test-results*
+    index: {{ es_metadata_index }}
+    benchmarkIndex: {{ es_benchmark_index }}
     metadata:
       olsTestWorkers: {{ ols_test_workers }}
       olsTestDuration: "20m"

diff --git a/pkg/algorithms/algorithm.py b/pkg/algorithms/algorithm.py
@@ -41,6 +41,7 @@ def output_json(self) -> Tuple[str, str, bool]:
         _, change_points_by_metric = self._analyze()
         dataframe_json = self.dataframe.to_json(orient="records")
         dataframe_json = json.loads(dataframe_json)
+        collapsed_json = []
 
         for index, entry in enumerate(dataframe_json):
             entry["metrics"] = {
@@ -60,14 +61,32 @@ def output_json(self) -> Tuple[str, str, bool]:
                     percentage_change * self.metrics_config[key]["direction"] > 0
                     or self.metrics_config[key]["direction"] == 0
                 ):
-                    dataframe_json[index]["metrics"][key][
-                        "percentage_change"
-                    ] = percentage_change
+                    dataframe_json[index]["metrics"][key]["percentage_change"] = (
+                        percentage_change
+                    )
                     dataframe_json[index]["is_changepoint"] = True
+                    if self.options["collapse"]:
+                        if (
+                            index > 0
+                            and dataframe_json[index - 1] not in collapsed_json
+                        ):
+                            collapsed_json.append(dataframe_json[index - 1])
+                        if dataframe_json[index] not in collapsed_json:
+                            collapsed_json.append(dataframe_json[index])
+                        if (
+                            index < len(dataframe_json) - 1
+                            and dataframe_json[index + 1] not in collapsed_json
+                        ):
+                            collapsed_json.append(dataframe_json[index + 1])
+        return_json = collapsed_json if self.options["collapse"] else dataframe_json
+
+        return (
+            self.test["name"],
+            json.dumps(return_json, indent=2),
+            self.regression_flag,
+        )
 
-        return self.test["name"], json.dumps(dataframe_json, indent=2), self.regression_flag
-
-    def output_text(self) -> Tuple[str,str, bool]:
+    def output_text(self) -> Tuple[str, str, bool]:
         """Outputs the data in text/tabular format"""
         series, change_points_by_metric = self._analyze()
         change_points_by_time = self.group_change_points_by_time(
@@ -79,7 +98,7 @@ def output_text(self) -> Tuple[str,str, bool]:
         )
         return self.test["name"], output_table, self.regression_flag
 
-    def output_junit(self) -> Tuple[str,str, bool]:
+    def output_junit(self) -> Tuple[str, str, bool]:
         """Output junit format
 
         Returns:
@@ -91,7 +110,6 @@ def output_junit(self) -> Tuple[str,str, bool]:
             test_name=test_name,
             data_json=data_json,
             metrics_config=self.metrics_config,
-            options=self.options,
         )
         return test_name, data_junit, self.regression_flag
 
@@ -157,7 +175,7 @@ def setup_series(self) -> Series:
 
         return series
 
-    def output(self, output_format) -> Union[Any,None]:
+    def output(self, output_format) -> Union[Any, None]:
         """Method to select output method
 
         Args:

diff --git a/pkg/utils.py b/pkg/utils.py
@@ -20,8 +20,6 @@
 import pyshorteners
 
 
-
-
 # pylint: disable=too-many-locals
 def get_metric_data(
     uuids: List[str], index: str, metrics: Dict[str, Any], match: Matcher
@@ -52,9 +50,13 @@ def get_metric_data(
         logger_instance.info("Collecting %s", metric_name)
         try:
             if "agg" in metric:
-                metric_df, metric_dataframe_name = process_aggregation_metric(uuids, index, metric, match)
+                metric_df, metric_dataframe_name = process_aggregation_metric(
+                    uuids, index, metric, match
+                )
             else:
-                metric_df, metric_dataframe_name = process_standard_metric(uuids, index, metric, match, metric_value_field)
+                metric_df, metric_dataframe_name = process_standard_metric(
+                    uuids, index, metric, match, metric_value_field
+                )
 
             metric["labels"] = labels
             metric["direction"] = direction
@@ -69,6 +71,7 @@ def get_metric_data(
             )
     return dataframe_list, metrics_config
 
+
 def process_aggregation_metric(
     uuids: List[str], index: str, metric: Dict[str, Any], match: Matcher
 ) -> pd.DataFrame:
@@ -87,13 +90,24 @@ def process_aggregation_metric(
     aggregation_value = metric["agg"]["value"]
     aggregation_type = metric["agg"]["agg_type"]
     aggregation_name = f"{aggregation_value}_{aggregation_type}"
-    aggregated_df = match.convert_to_df(aggregated_metric_data, columns=["uuid", "timestamp", aggregation_name])
+    aggregated_df = match.convert_to_df(
+        aggregated_metric_data, columns=["uuid", "timestamp", aggregation_name]
+    )
     aggregated_df = aggregated_df.drop_duplicates(subset=["uuid"], keep="first")
     aggregated_metric_name = f"{metric['name']}_{aggregation_type}"
-    aggregated_df = aggregated_df.rename(columns={aggregation_name: aggregated_metric_name})
+    aggregated_df = aggregated_df.rename(
+        columns={aggregation_name: aggregated_metric_name}
+    )
     return aggregated_df, aggregated_metric_name
 
-def process_standard_metric(uuids: List[str], index: str, metric: Dict[str, Any], match: Matcher, metric_value_field: str) -> pd.DataFrame:
+
+def process_standard_metric(
+    uuids: List[str],
+    index: str,
+    metric: Dict[str, Any],
+    match: Matcher,
+    metric_value_field: str,
+) -> pd.DataFrame:
     """Method to get dataframe of standard metric
 
     Args:
@@ -106,13 +120,18 @@ def process_standard_metric(uuids: List[str], index: str, metric: Dict[str, Any]
     Returns:
         pd.DataFrame: _description_
     """
-    standard_metric_data = match.getResults("",uuids, index, metric)
-    standard_metric_df = match.convert_to_df(standard_metric_data, columns=["uuid", "timestamp", metric_value_field])
+    standard_metric_data = match.getResults("", uuids, index, metric)
+    standard_metric_df = match.convert_to_df(
+        standard_metric_data, columns=["uuid", "timestamp", metric_value_field]
+    )
     standard_metric_name = f"{metric['name']}_{metric_value_field}"
-    standard_metric_df = standard_metric_df.rename(columns={metric_value_field: standard_metric_name})
+    standard_metric_df = standard_metric_df.rename(
+        columns={metric_value_field: standard_metric_name}
+    )
     standard_metric_df = standard_metric_df.drop_duplicates()
     return standard_metric_df, standard_metric_name
 
+
 def extract_metadata_from_test(test: Dict[str, Any]) -> Dict[Any, Any]:
     """Gets metadata of the run from each test
 
@@ -129,9 +148,6 @@ def extract_metadata_from_test(test: Dict[str, Any]) -> Dict[Any, Any]:
     return metadata
 
 
-
-
-
 def get_datasource(data: Dict[Any, Any]) -> str:
     """Gets es url from config or env
 
@@ -157,7 +173,7 @@ def filter_uuids_on_index(
     uuids: List[str],
     match: Matcher,
     baseline: str,
-    filter_node_count: bool
+    filter_node_count: bool,
 ) -> List[str]:
     """returns the index to be used and runs as uuids
 
@@ -219,9 +235,18 @@ def process_test(
     fingerprint_index = test["index"]
 
     # getting metadata
-    metadata = extract_metadata_from_test(test) if options["uuid"] in ("", None) else get_metadata_with_uuid(options["uuid"], match)
+    metadata = (
+        extract_metadata_from_test(test)
+        if options["uuid"] in ("", None)
+        else get_metadata_with_uuid(options["uuid"], match)
+    )
     # get uuids, buildUrls matching with the metadata
-    runs = match.get_uuid_by_metadata(metadata, fingerprint_index, lookback_date=start_timestamp, lookback_size=options['lookback_size'])
+    runs = match.get_uuid_by_metadata(
+        metadata,
+        fingerprint_index,
+        lookback_date=start_timestamp,
+        lookback_size=options["lookback_size"],
+    )
     uuids = [run["uuid"] for run in runs]
     buildUrls = {run["uuid"]: run["buildUrl"] for run in runs}
     # get uuids if there is a baseline
@@ -236,7 +261,12 @@ def process_test(
     benchmark_index = test["benchmarkIndex"]
 
     uuids = filter_uuids_on_index(
-        metadata, benchmark_index, uuids, match, options["baseline"], options['node_count']
+        metadata,
+        benchmark_index,
+        uuids,
+        match,
+        options["baseline"],
+        options["node_count"],
     )
     # get metrics data and dataframe
     metrics = test["metrics"]
@@ -262,15 +292,15 @@ def process_test(
             if options["convert_tinyurl"]
             else buildUrls[uuid]
         )
-
         # pylint: disable = cell-var-from-loop
     )
-    merged_df=merged_df.reset_index(drop=True)
-    #save the dataframe
+    merged_df = merged_df.reset_index(drop=True)
+    # save the dataframe
     output_file_path = f"{options['save_data_path'].split('.')[0]}-{test['name']}.csv"
     match.save_results(merged_df, csv_file_path=output_file_path)
     return merged_df, metrics_config
 
+
 def shorten_url(shortener: any, uuids: str) -> str:
     """Shorten url if there is a list of buildUrls
 
@@ -284,9 +314,10 @@ def shorten_url(shortener: any, uuids: str) -> str:
     short_url_list = []
     for buildUrl in uuids.split(","):
         short_url_list.append(shortener.tinyurl.short(buildUrl))
-    short_url = ','.join(short_url_list)
+    short_url = ",".join(short_url_list)
     return short_url
 
+
 def get_metadata_with_uuid(uuid: str, match: Matcher) -> Dict[Any, Any]:
     """Gets metadata of the run from each test
 
@@ -333,10 +364,7 @@ def get_metadata_with_uuid(uuid: str, match: Matcher) -> Dict[Any, Any]:
 
 
 def json_to_junit(
-    test_name: str,
-    data_json: Dict[Any, Any],
-    metrics_config: Dict[Any, Any],
-    options: Dict[Any, Any],
+    test_name: str, data_json: Dict[Any, Any], metrics_config: Dict[Any, Any]
 ) -> str:
     """Convert json to junit format
 
@@ -371,11 +399,7 @@ def json_to_junit(
             failures_count += 1
             failure = ET.SubElement(testcase, "failure")
             failure.text = (
-                "\n"
-                + generate_tabular_output(
-                    data_json, metric_name=metric, collapse=options["collapse"]
-                )
-                + "\n"
+                "\n" + generate_tabular_output(data_json, metric_name=metric) + "\n"
             )
 
     testsuite.set("failures", str(failures_count))
@@ -386,7 +410,7 @@ def json_to_junit(
     return pretty_xml_as_string
 
 
-def generate_tabular_output(data: list, metric_name: str, collapse: bool) -> str:
+def generate_tabular_output(data: list, metric_name: str) -> str:
     """converts json to tabular format
 
     Args:
@@ -406,16 +430,8 @@ def generate_tabular_output(data: list, metric_name: str, collapse: bool) -> str
         "is_changepoint": bool(record["metrics"][metric_name]["percentage_change"]),
         "percentage_change": record["metrics"][metric_name]["percentage_change"],
     }
-    if collapse:
-        for i in range(1, len(data)):
-            if data[i]["metrics"][metric_name]["percentage_change"] != 0:
-                records.append(create_record(data[i - 1]))
-                records.append(create_record(data[i]))
-                if i + 1 < len(data):
-                    records.append(create_record(data[i + 1]))
-    else:
-        for i in range(0, len(data)):
-            records.append(create_record(data[i]))
+    for i in range(0, len(data)):
+        records.append(create_record(data[i]))
 
     df = pd.DataFrame(records).drop_duplicates().reset_index(drop=True)
     table = tabulate(df, headers="keys", tablefmt="psql")
@@ -424,7 +440,9 @@ def generate_tabular_output(data: list, metric_name: str, collapse: bool) -> str
     if lines:
         highlighted_lines += lines[0:3]
     for i, line in enumerate(lines[3:-1]):
-        if df["percentage_change"][i]:  # Offset by 3 to account for header and separator
+        if df["percentage_change"][
+            i
+        ]:  # Offset by 3 to account for header and separator
             highlighted_line = f"{lines[i+3]} -- changepoint"
             highlighted_lines.append(highlighted_line)
         else:

diff --git a/test.bats b/test.bats
@@ -118,7 +118,7 @@ setup() {
 
 @test "orion cmd ols configuration test " {
   export ols_test_workers=10
-  run_cmd orion cmd --config "examples/ols-load-generator.yaml" --hunter-analyze
+  run_cmd es_metadata_index="perf_scale_ci*" es_benchmark_index="ols-load-test-results*" orion cmd --config "examples/ols-load-generator.yaml" --hunter-analyze
 }
 
 @test "orion daemon small scale cluster density with anomaly detection " {