Skip to content

Commit 494a2f9

Browse files
paigerube14Auto User
authored andcommitted
geting percent difference
rh-pre-commit.version: 2.2.0 rh-pre-commit.check-secrets: ENABLED
1 parent 91f953a commit 494a2f9

File tree

10 files changed

+172
-244
lines changed

10 files changed

+172
-244
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,11 +123,16 @@ Additionally, users can specify a custom path for the output CSV file using the
123123

124124
Orion now supports anomaly detection for your data. Use the ```--anomaly-detection``` command to start the anomaly detection process.
125125

126+
127+
To be able to find significant percent differences in workload runs, use the ```--cmr``` command. This will compare the most recent run with any previous matching runs or baseline UUIDs. If more than 1 other run is found from the most recent, the values will be meaned together and then compared with the previous run. Use with *direction: 0* (set in the config) when using ```-o json``` format to see percent differences
128+
129+
![cmr percent difference](percentdiff.jpg)
130+
126131
You can now constrain your look-back period using the ```--lookback``` option. The format for look-back is ```XdYh```, where X represents the number of days and Y represents the number of hours.
127132

128133
You can open the match requirement by using the ```--node-count``` option to find any matching uuid based on the metadata and not have to have the same jobConfig.jobIterations. This variable is a ```True``` or ```False```, defaulted to False.
129134

130-
**_NOTE:_** The ```--hunter-analyze``` and ```--anomaly-detection``` flags are mutually exclusive. They cannot be used together because they represent different algorithms designed for distinct use cases.
135+
**_NOTE:_** The ```cmr```, ```--hunter-analyze``` and ```--anomaly-detection``` flags are mutually exclusive. They cannot be used together because they represent different algorithms designed for distinct use cases.
131136

132137
### Daemon mode
133138
The core purpose of Daemon mode is to operate Orion as a self-contained server, dedicated to handling incoming requests. By sending a POST request accompanied by a test name of predefined tests, users can trigger change point detection on the provided metadata and metrics. Following the processing, the response is formatted in JSON, providing a structured output for seamless integration and analysis. To trigger daemon mode just use the following commands

orion.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ def cli(max_content_width=120): # pylint: disable=unused-argument
6969

7070
# pylint: disable=too-many-locals
7171
@cli.command(name="cmd")
72+
@click.option(
73+
"--cmr",
74+
is_flag=True,
75+
help="Generate percent difference in comparison",
76+
cls=MutuallyExclusiveOption,
77+
mutually_exclusive=["anomaly_detection","hunter_analyze"],
78+
)
79+
@click.option("--filter", is_flag=True, help="Generate percent difference in comparison")
7280
@click.option("--config", default="config.yaml", help="Path to the configuration file")
7381
@click.option(
7482
"--save-data-path", default="data.csv", help="Path to save the output file"
@@ -79,7 +87,7 @@ def cli(max_content_width=120): # pylint: disable=unused-argument
7987
is_flag=True,
8088
help="run hunter analyze",
8189
cls=MutuallyExclusiveOption,
82-
mutually_exclusive=["anomaly_detection"],
90+
mutually_exclusive=["anomaly_detection","cmr"],
8391
)
8492
@click.option("--anomaly-window", type=int, callback=validate_anomaly_options, help="set window size for moving average for anomaly-detection")
8593
@click.option("--min-anomaly-percent", type=int, callback=validate_anomaly_options, help="set minimum percentage difference from moving average for data point to be detected as anomaly")
@@ -88,7 +96,7 @@ def cli(max_content_width=120): # pylint: disable=unused-argument
8896
is_flag=True,
8997
help="run anomaly detection algorithm powered by isolation forest",
9098
cls=MutuallyExclusiveOption,
91-
mutually_exclusive=["hunter_analyze"],
99+
mutually_exclusive=["hunter_analyze","cmr"],
92100
)
93101
@click.option(
94102
"-o",

percentdiff.jpg

27.5 KB
Loading

pkg/algorithms/algorithmFactory.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pkg.constants as cnsts
77
from .edivisive import EDivisive
88
from .isolationforest import IsolationForestWeightedMean
9+
from .cmr import CMR
910

1011

1112
class AlgorithmFactory: # pylint: disable= too-few-public-methods, too-many-arguments, line-too-long
@@ -30,4 +31,6 @@ def instantiate_algorithm(self, algorithm: str, matcher: Matcher, dataframe:pd.D
3031
return EDivisive(matcher, dataframe, test, options, metrics_config)
3132
if algorithm == cnsts.ISOLATION_FOREST:
3233
return IsolationForestWeightedMean(matcher, dataframe, test, options, metrics_config)
34+
if algorithm == cnsts.CMR:
35+
return CMR(matcher, dataframe, test, options, metrics_config)
3336
raise ValueError("Invalid algorithm called")

pkg/algorithms/cmr/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""
2+
Init for CMR Algorithm
3+
"""
4+
from .cmr import CMR

pkg/algorithms/cmr/cmr.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
"""CMR - Comparing Mean Responses Algorithm"""
2+
3+
# pylint: disable = line-too-long
4+
from typing import List
5+
import pandas as pd
6+
import numpy
7+
8+
from fmatch.logrus import SingletonLogger
9+
from hunter.series import ChangePoint, ComparativeStats
10+
from pkg.algorithms.algorithm import Algorithm
11+
12+
13+
class CMR(Algorithm):
14+
"""Implementation of the CMR algorithm
15+
Will Combine metrics into 2 lines and compare with a tolerancy to set pass fail
16+
17+
Args:
18+
Algorithm (Algorithm): Inherits
19+
"""
20+
21+
22+
def _analyze(self):
23+
"""Analyze the dataframe with meaning any previous data and generate percent change with a current uuid
24+
25+
Returns:
26+
series: data series that contains attributes and full dataframe
27+
change_points_by_metric: list of ChangePoints
28+
"""
29+
logger_instance = SingletonLogger.getLogger("Orion")
30+
logger_instance.info("Starting analysis using CMR")
31+
self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"])
32+
self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9
33+
34+
if len(self.dataframe.index) == 1:
35+
series= self.setup_series()
36+
series.data = self.dataframe
37+
return series, {}
38+
# if larger than 2 rows, need to get the mean of 0 through -2
39+
self.dataframe = self.combine_and_average_runs( self.dataframe)
40+
41+
series= self.setup_series()
42+
tolerancy = 20
43+
44+
df, change_points_by_metric = self.run_cmr(tolerancy, self.dataframe)
45+
series.data= df
46+
return series, change_points_by_metric
47+
48+
49+
def run_cmr(self, tolerancy: int, dataframe_list: pd.DataFrame):
50+
"""
51+
Generate the percent difference in a 2 row dataframe
52+
53+
Args:
54+
tolerancy (int): tolerancy to compare on
55+
metric_columns (List[str]): string list of metric column names
56+
dataframe_list (pd.DataFrame): data frame of all data to compare on
57+
58+
Returns:
59+
pd.Dataframe, dict[metric_name, ChangePoint]: Returned data frame and change points
60+
"""
61+
metric_columns = self.metrics_config.keys()
62+
change_points_by_metric={ k:[] for k in metric_columns }
63+
max_date_time = pd.Timestamp.max.to_pydatetime()
64+
max_time = max_date_time.timestamp()
65+
# difference = ["difference", max_time]
66+
# pass_fail_list = ["Pass/Fail", max_time]
67+
for column in metric_columns:
68+
pct_change_result = dataframe_list[column].pct_change()
69+
single_pct_diff = round(pct_change_result.iloc[[-1]].values[0] * 100)
70+
pass_fail = "Pass"
71+
if single_pct_diff > tolerancy:
72+
pass_fail = "Fail"
73+
74+
change_point = ChangePoint(metric=column,
75+
index=1,
76+
time=max_time,
77+
stats=ComparativeStats(
78+
mean_1=dataframe_list[column][0],
79+
mean_2=dataframe_list[column][1],
80+
std_1=0,
81+
std_2=0,
82+
pvalue=1
83+
))
84+
change_points_by_metric[column].append(change_point)
85+
# difference.append(single_pct_diff)
86+
# pass_fail_list.append(pass_fail)
87+
# difference.append("none")
88+
# pass_fail_list.append("none")
89+
# dataframe_list.loc[len(dataframe_list.index)] = difference
90+
#dataframe_list.loc[len(dataframe_list.index)] = pass_fail_list
91+
# logger_instance.info("final data frame " + str(dataframe_list))
92+
93+
# based on change point generate pass/fail
94+
return dataframe_list, change_points_by_metric
95+
96+
def combine_and_average_runs(self, dataFrame: pd.DataFrame):
97+
"""
98+
If more than 1 previous run, mean data together into 1 single row
99+
Combine with current run into 1 data frame (current run being -1 index)
100+
101+
Args:
102+
dataFrame (pd.DataFrame): data to combine into 2 rows
103+
104+
Returns:
105+
pd.Dataframe: data frame of most recent run and averaged previous runs
106+
"""
107+
i = 0
108+
109+
last_row = dataFrame.tail(1)
110+
dF = dataFrame[:-1]
111+
data2 = {}
112+
113+
metric_columns = list(dataFrame.columns)
114+
for column in metric_columns:
115+
116+
if isinstance(dF.loc[0, column], (numpy.float64, numpy.int64)):
117+
mean = dF[column].mean()
118+
data2[column] = [mean]
119+
else:
120+
column_list = dF[column].tolist()
121+
non_numeric_joined_list = ','.join(column_list)
122+
data2[column] = [non_numeric_joined_list]
123+
i += 1
124+
df2 = pd.DataFrame(data2)
125+
126+
result = pd.concat([df2, last_row], ignore_index=True)
127+
return result

pkg/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
JSON="json"
77
TEXT="text"
88
JUNIT="junit"
9+
CMR="cmr"

pkg/runTest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ def run(**kwargs: dict[str, Any]) -> dict[str, Any]:
5050
algorithm_name = cnsts.EDIVISIVE
5151
elif kwargs["anomaly_detection"]:
5252
algorithm_name = cnsts.ISOLATION_FOREST
53+
elif kwargs['cmr']:
54+
algorithm_name = cnsts.CMR
5355
else:
5456
return None
5557

pkg/utils.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,16 +258,33 @@ def process_test(
258258
shortener = pyshorteners.Shortener(timeout=10)
259259
merged_df["buildUrl"] = merged_df["uuid"].apply(
260260
lambda uuid: (
261-
shortener.tinyurl.short(buildUrls[uuid])
261+
shorten_url(shortener, buildUrls[uuid])
262262
if options["convert_tinyurl"]
263263
else buildUrls[uuid]
264-
) # pylint: disable = cell-var-from-loop
264+
)
265+
266+
# pylint: disable = cell-var-from-loop
265267
)
266268
#save the dataframe
267269
output_file_path = f"{options['save_data_path'].split('.')[0]}-{test['name']}.csv"
268270
match.save_results(merged_df, csv_file_path=output_file_path)
269271
return merged_df, metrics_config
270272

273+
def shorten_url(shortener: any, uuids: str) -> str:
274+
"""Shorten url if there is a list of buildUrls
275+
276+
Args:
277+
shortener (any): shortener object to use tinyrl.short on
278+
uuids (List[str]): List of uuids to shorten
279+
280+
Returns:
281+
str: a combined string of shortened urls
282+
"""
283+
short_url_list = []
284+
for buildUrl in uuids.split(","):
285+
short_url_list.append(shortener.tinyurl.short(buildUrl))
286+
short_url = ','.join(short_url_list)
287+
return short_url
271288

272289
def get_metadata_with_uuid(uuid: str, match: Matcher) -> Dict[Any, Any]:
273290
"""Gets metadata of the run from each test

0 commit comments

Comments
 (0)