Skip to content

Commit 5192418

Browse files
paigerube14Auto User
authored andcommitted
geting percent difference
rh-pre-commit.version: 2.2.0 rh-pre-commit.check-secrets: ENABLED Signed-off-by: Auto User <auto@users.noreply.github.com>
1 parent 248893a commit 5192418

File tree

9 files changed

+175
-10
lines changed

9 files changed

+175
-10
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ Additionally, users can specify a custom path for the output CSV file using the
123123

124124
Orion now supports anomaly detection for your data. Use the ```--anomaly-detection``` command to start the anomaly detection process.
125125

126+
127+
To be able to find significant percent differences in workload runs, use the ```--cmr``` command. This will compare the most recent run with any previous matching runs or baseline UUIDs. If more than 1 other run is found from the most recent, the values will be meaned together and then compared with the previous run. Use with *direction: 0* (set in the config) when using ```-o json``` format to see percent differences
128+
129+
![cmr percent difference](percentdiff.jpg)
130+
126131
You can now constrain your look-back period using the ```--lookback``` option. The format for look-back is ```XdYh```, where X represents the number of days and Y represents the number of hours.
127132

128133
To specify how many runs to look back, you can use the ```--lookback-size``` option. By default, this option is set to 10000.
@@ -156,7 +161,7 @@ This is similar to how car manufacturers warranty plays out such as 5years or 60
156161

157162
You can open the match requirement by using the ```--node-count``` option to find any matching uuid based on the metadata and not have to have the same jobConfig.jobIterations. This variable is a ```True``` or ```False```, defaulted to False.
158163

159-
**_NOTE:_** The ```--hunter-analyze``` and ```--anomaly-detection``` flags are mutually exclusive. They cannot be used together because they represent different algorithms designed for distinct use cases.
164+
**_NOTE:_** The ```cmr```, ```--hunter-analyze``` and ```--anomaly-detection``` flags are mutually exclusive. They cannot be used together because they represent different algorithms designed for distinct use cases.
160165

161166
### Daemon mode
162167
The core purpose of Daemon mode is to operate Orion as a self-contained server, dedicated to handling incoming requests. By sending a POST request accompanied by a test name of predefined tests, users can trigger change point detection on the provided metadata and metrics. Following the processing, the response is formatted in JSON, providing a structured output for seamless integration and analysis. To trigger daemon mode just use the following commands

orion.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ def cli(max_content_width=120): # pylint: disable=unused-argument
6969

7070
# pylint: disable=too-many-locals
7171
@cli.command(name="cmd")
72+
@click.option(
73+
"--cmr",
74+
is_flag=True,
75+
help="Generate percent difference in comparison",
76+
cls=MutuallyExclusiveOption,
77+
mutually_exclusive=["anomaly_detection","hunter_analyze"],
78+
)
79+
@click.option("--filter", is_flag=True, help="Generate percent difference in comparison")
7280
@click.option("--config", default="config.yaml", help="Path to the configuration file")
7381
@click.option(
7482
"--save-data-path", default="data.csv", help="Path to save the output file"
@@ -79,7 +87,7 @@ def cli(max_content_width=120): # pylint: disable=unused-argument
7987
is_flag=True,
8088
help="run hunter analyze",
8189
cls=MutuallyExclusiveOption,
82-
mutually_exclusive=["anomaly_detection"],
90+
mutually_exclusive=["anomaly_detection","cmr"],
8391
)
8492
@click.option("--anomaly-window", type=int, callback=validate_anomaly_options, help="set window size for moving average for anomaly-detection")
8593
@click.option("--min-anomaly-percent", type=int, callback=validate_anomaly_options, help="set minimum percentage difference from moving average for data point to be detected as anomaly")
@@ -88,7 +96,7 @@ def cli(max_content_width=120): # pylint: disable=unused-argument
8896
is_flag=True,
8997
help="run anomaly detection algorithm powered by isolation forest",
9098
cls=MutuallyExclusiveOption,
91-
mutually_exclusive=["hunter_analyze"],
99+
mutually_exclusive=["hunter_analyze","cmr"],
92100
)
93101
@click.option(
94102
"-o",

percentdiff.jpg

27.5 KB
Loading

pkg/algorithms/algorithmFactory.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pkg.constants as cnsts
77
from .edivisive import EDivisive
88
from .isolationforest import IsolationForestWeightedMean
9+
from .cmr import CMR
910

1011

1112
class AlgorithmFactory: # pylint: disable= too-few-public-methods, too-many-arguments, line-too-long
@@ -30,4 +31,6 @@ def instantiate_algorithm(self, algorithm: str, matcher: Matcher, dataframe:pd.D
3031
return EDivisive(matcher, dataframe, test, options, metrics_config)
3132
if algorithm == cnsts.ISOLATION_FOREST:
3233
return IsolationForestWeightedMean(matcher, dataframe, test, options, metrics_config)
34+
if algorithm == cnsts.CMR:
35+
return CMR(matcher, dataframe, test, options, metrics_config)
3336
raise ValueError("Invalid algorithm called")

pkg/algorithms/cmr/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""
2+
Init for CMR Algorithm
3+
"""
4+
from .cmr import CMR

pkg/algorithms/cmr/cmr.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""CMR - Comparing Mean Responses Algorithm"""
2+
3+
# pylint: disable = line-too-long
4+
from typing import List
5+
import pandas as pd
6+
import numpy
7+
8+
from fmatch.logrus import SingletonLogger
9+
from hunter.series import ChangePoint, ComparativeStats
10+
from pkg.algorithms.algorithm import Algorithm
11+
12+
13+
class CMR(Algorithm):
14+
"""Implementation of the CMR algorithm
15+
Will Combine metrics into 2 lines and compare with a tolerancy to set pass fail
16+
17+
Args:
18+
Algorithm (Algorithm): Inherits
19+
"""
20+
21+
22+
def _analyze(self):
23+
"""Analyze the dataframe with meaning any previous data and generate percent change with a current uuid
24+
25+
Returns:
26+
series: data series that contains attributes and full dataframe
27+
change_points_by_metric: list of ChangePoints
28+
"""
29+
logger_instance = SingletonLogger.getLogger("Orion")
30+
logger_instance.info("Starting analysis using CMR")
31+
self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"])
32+
self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9
33+
34+
if len(self.dataframe.index) == 1:
35+
series= self.setup_series()
36+
series.data = self.dataframe
37+
return series, {}
38+
# if larger than 2 rows, need to get the mean of 0 through -2
39+
self.dataframe = self.combine_and_average_runs( self.dataframe)
40+
41+
series= self.setup_series()
42+
43+
df, change_points_by_metric = self.run_cmr(self.dataframe)
44+
series.data= df
45+
return series, change_points_by_metric
46+
47+
48+
def run_cmr(self, dataframe_list: pd.DataFrame):
49+
"""
50+
Generate the percent difference in a 2 row dataframe
51+
52+
Args:
53+
tolerancy (int): tolerancy to compare on
54+
metric_columns (List[str]): string list of metric column names
55+
dataframe_list (pd.DataFrame): data frame of all data to compare on
56+
57+
Returns:
58+
pd.Dataframe, dict[metric_name, ChangePoint]: Returned data frame and change points
59+
"""
60+
metric_columns = self.metrics_config.keys()
61+
change_points_by_metric={ k:[] for k in metric_columns }
62+
max_date_time = pd.Timestamp.max.to_pydatetime()
63+
max_time = max_date_time.timestamp()
64+
65+
for column in metric_columns:
66+
67+
change_point = ChangePoint(metric=column,
68+
index=1,
69+
time=max_time,
70+
stats=ComparativeStats(
71+
mean_1=dataframe_list[column][0],
72+
mean_2=dataframe_list[column][1],
73+
std_1=0,
74+
std_2=0,
75+
pvalue=1
76+
))
77+
change_points_by_metric[column].append(change_point)
78+
79+
# based on change point generate pass/fail
80+
return dataframe_list, change_points_by_metric
81+
82+
def combine_and_average_runs(self, dataFrame: pd.DataFrame):
83+
"""
84+
If more than 1 previous run, mean data together into 1 single row
85+
Combine with current run into 1 data frame (current run being -1 index)
86+
87+
Args:
88+
dataFrame (pd.DataFrame): data to combine into 2 rows
89+
90+
Returns:
91+
pd.Dataframe: data frame of most recent run and averaged previous runs
92+
"""
93+
i = 0
94+
95+
last_row = dataFrame.tail(1)
96+
dF = dataFrame[:-1]
97+
data2 = {}
98+
99+
metric_columns = list(dataFrame.columns)
100+
for column in metric_columns:
101+
102+
if isinstance(dF.loc[0, column], (numpy.float64, numpy.int64)):
103+
mean = dF[column].mean()
104+
data2[column] = [mean]
105+
else:
106+
column_list = dF[column].tolist()
107+
non_numeric_joined_list = ','.join(column_list)
108+
data2[column] = [non_numeric_joined_list]
109+
i += 1
110+
df2 = pd.DataFrame(data2)
111+
112+
result = pd.concat([df2, last_row], ignore_index=True)
113+
return result

pkg/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
JSON="json"
77
TEXT="text"
88
JUNIT="junit"
9+
CMR="cmr"

pkg/runTest.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,24 @@
99
import pkg.constants as cnsts
1010
from pkg.utils import get_datasource, process_test, get_subtracted_timestamp
1111

12+
def get_algorithm_type(kwargs):
13+
"""Switch Case of getting algorithm name
1214
15+
Args:
16+
kwargs (dict): passed command line arguments
17+
18+
Returns:
19+
str: algorithm name
20+
"""
21+
if kwargs["hunter_analyze"]:
22+
algorithm_name = cnsts.EDIVISIVE
23+
elif kwargs["anomaly_detection"]:
24+
algorithm_name = cnsts.ISOLATION_FOREST
25+
elif kwargs['cmr']:
26+
algorithm_name = cnsts.CMR
27+
else:
28+
algorithm_name = None
29+
return algorithm_name
1330

1431
def run(**kwargs: dict[str, Any]) -> dict[str, Any]: #pylint: disable = R0914
1532
"""run method to start the tests
@@ -48,11 +65,8 @@ def run(**kwargs: dict[str, Any]) -> dict[str, Any]: #pylint: disable = R0914
4865
if fingerprint_matched_df is None:
4966
sys.exit(3) # No data present
5067

51-
if kwargs["hunter_analyze"]:
52-
algorithm_name = cnsts.EDIVISIVE
53-
elif kwargs["anomaly_detection"]:
54-
algorithm_name = cnsts.ISOLATION_FOREST
55-
else:
68+
algorithm_name = get_algorithm_type(kwargs)
69+
if algorithm_name is None:
5670
return None, None
5771

5872
algorithmFactory = AlgorithmFactory()

pkg/utils.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,17 +258,34 @@ def process_test(
258258
shortener = pyshorteners.Shortener(timeout=10)
259259
merged_df["buildUrl"] = merged_df["uuid"].apply(
260260
lambda uuid: (
261-
shortener.tinyurl.short(buildUrls[uuid])
261+
shorten_url(shortener, buildUrls[uuid])
262262
if options["convert_tinyurl"]
263263
else buildUrls[uuid]
264-
) # pylint: disable = cell-var-from-loop
264+
)
265+
266+
# pylint: disable = cell-var-from-loop
265267
)
266268
merged_df=merged_df.reset_index(drop=True)
267269
#save the dataframe
268270
output_file_path = f"{options['save_data_path'].split('.')[0]}-{test['name']}.csv"
269271
match.save_results(merged_df, csv_file_path=output_file_path)
270272
return merged_df, metrics_config
271273

274+
def shorten_url(shortener: any, uuids: str) -> str:
275+
"""Shorten url if there is a list of buildUrls
276+
277+
Args:
278+
shortener (any): shortener object to use tinyrl.short on
279+
uuids (List[str]): List of uuids to shorten
280+
281+
Returns:
282+
str: a combined string of shortened urls
283+
"""
284+
short_url_list = []
285+
for buildUrl in uuids.split(","):
286+
short_url_list.append(shortener.tinyurl.short(buildUrl))
287+
short_url = ','.join(short_url_list)
288+
return short_url
272289

273290
def get_metadata_with_uuid(uuid: str, match: Matcher) -> Dict[Any, Any]:
274291
"""Gets metadata of the run from each test

0 commit comments

Comments
 (0)