Skip to content

Commit da31ecc

Browse files
authored
Merge pull request #44 from redBorder/add_dynamic_filters
PR-40: Add dynamic filters
2 parents 3c5e463 + d6361bb commit da31ecc

26 files changed

+513
-269
lines changed

.github/workflows/security.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
2828
- name: Install safety for security checks
2929
run: |
30+
pip install cffi
3031
pip install safety
3132
3233
- name: Run safety check

resources/src/__main__.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919

2020

2121
import os
22-
from logger.logger import logger
23-
from server.rest import APIServer, config
24-
from server.production import GunicornApp
25-
from redborder.rq import RqManager
22+
import sys
23+
24+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
25+
from resources.src.logger.logger import logger
26+
from resources.src.server.rest import APIServer, config
27+
from resources.src.server.production import GunicornApp
28+
from resources.src.redborder.rq import RqManager
2629

2730
class Outliers:
2831
def __init__(self) -> None:

resources/src/ai/outliers.py

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@
3333
import pandas as pd
3434
import tensorflow as tf
3535

36-
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
37-
from logger import logger
36+
from resources.src.logger import logger
3837

3938
class Autoencoder:
4039
"""
@@ -61,8 +60,11 @@ def __init__(self, model_file, model_config_file):
6160
loss_mult_minute (float): Extra penalty in the loss function for guessing wrong
6261
'minute' field.
6362
"""
64-
self.check_existence(model_file, model_config_file)
65-
63+
try:
64+
self.check_existence(model_file, model_config_file)
65+
except FileNotFoundError as e:
66+
logger.logger.error("Could not find the asked model files")
67+
raise e
6668
try:
6769
model_config = configparser.ConfigParser()
6870
model_config.read(model_config_file)
@@ -77,28 +79,24 @@ def __init__(self, model_file, model_config_file):
7779
self.num_window = int(general_section.get('NUM_WINDOWS', 0))
7880
self.loss_mult_metric = float(general_section.get('LOSS_MULT_METRIC', 0))
7981
self.loss_mult_minute = float(general_section.get('LOSS_MULT_MINUTE', 0))
80-
except FileNotFoundError:
81-
logger.logger.error(f"Error: Model file '{model_config_file}' not found.")
82-
except (OSError, ValueError) as e:
83-
logger.logger.error(f"Error loading model conif: {e}")
82+
except Exception as e:
83+
logger.logger.error(f"Could not load model conif: {e}")
84+
raise e
8485
try:
8586
self.model = tf.keras.models.load_model(
8687
model_file,
8788
compile=False
8889
)
89-
except FileNotFoundError:
90-
logger.logger.error(f"Error: Model file '{model_file}' not found.")
91-
except (OSError, ValueError) as e:
92-
logger.logger.error(f"Error loading the model: {e}")
90+
except Exception as e:
91+
logger.logger.error(f"Could not load model {e}")
92+
raise e
9393

9494
def check_existence(self, model_file, model_config_file):
9595
"""
9696
Check existence of model files and copy them if missing.
9797
9898
This function checks if the provided `model_file` and `model_config_file` exist in their
99-
respective paths. If they don't exist, it renames and copies the corresponding default
100-
files from the 'traffic.keras' and 'traffic.ini' files, which are expected to be located
101-
in the same directory as the target files.
99+
respective paths. If they don't exist, it raises an error.
102100
103101
Args:
104102
model_file (str): Path to the target model file.
@@ -107,12 +105,13 @@ def check_existence(self, model_file, model_config_file):
107105
- The full path to the model configuration file you want to check and potentially copy.
108106
"""
109107
if not os.path.exists(model_file):
110-
os.rename(f"{os.path.dirname(model_file)}/traffic.keras", model_file)
111-
shutil.copy(model_file, f"{os.path.dirname(model_file)}/traffic.keras")
108+
error_msg=f"Model file '{os.path.basename(model_file)}' not found"
109+
logger.logger.error(error_msg)
110+
raise FileNotFoundError(error_msg)
112111
if not os.path.exists(model_config_file):
113-
os.rename(f"{os.path.dirname(model_config_file)}/traffic.ini", model_config_file)
114-
shutil.copy(model_config_file, f"{os.path.dirname(model_config_file)}/traffic.ini")
115-
112+
error_msg=f"Model config file '{os.path.basename(model_config_file)}' not found"
113+
logger.logger.error(error_msg)
114+
raise FileNotFoundError(error_msg)
116115

117116
def rescale(self, data):
118117
"""
@@ -253,12 +252,20 @@ def compute_json(self, metric, raw_json):
253252
254253
Args:
255254
metric (string): the name of field being analyzed.
256-
raw_json (Json): druid Json response with the data.
255+
raw_json (dict): deserialized Json druid response with the data.
257256
258257
Returns:
259-
(Json): Json with the anomalies and predictions for the data with RedBorder
258+
(dict): deserialized Json with the anomalies and predictions for the data with RedBorder
260259
prediction Json format.
261260
"""
261+
if metric=="" or metric not in self.metrics:
262+
error_msg = f"Model has not a metric called {metric}"
263+
logger.logger.error(error_msg)
264+
raise ValueError(error_msg)
265+
if not raw_json:
266+
error_msg = f"Input data is empty"
267+
logger.logger.error(error_msg)
268+
raise ValueError(error_msg)
262269
threshold = self.avg_loss+5*self.std_loss
263270
data, timestamps = self.input_json(raw_json)
264271
predicted, loss = self.calculate_predictions(data)
@@ -289,7 +296,7 @@ def input_json(self, raw_json):
289296
Also returns the timestamps for each entry.
290297
291298
Args:
292-
raw_json (Json): druid Json response with the data.
299+
raw_json (dict): deserialized Json druid response with the data.
293300
294301
Returns:
295302
data (numpy.ndarray): transformed data.
@@ -320,7 +327,7 @@ def output_json(self, metric, anomalies, predicted):
320327
predicted (pandas.DataFrame): predictions made by the model.
321328
322329
Returns:
323-
(Json): Json with the anomalies and predictions for the data with RedBorder prediction
330+
(dict): deserialized Json with the anomalies and predictions for the data with RedBorder prediction
324331
Json format.
325332
"""
326333
predicted = predicted.copy()
@@ -335,9 +342,18 @@ def output_json(self, metric, anomalies, predicted):
335342

336343
@staticmethod
337344
def execute_prediction_model(data, metric, model_file, model_config):
338-
autoencoder = Autoencoder(model_file, model_config)
339-
result = autoencoder.compute_json(metric, data)
340-
return result
345+
try:
346+
autoencoder = Autoencoder(model_file, model_config)
347+
return autoencoder.compute_json(metric, data)
348+
except Exception as e:
349+
logger.logger.error("Couldn't execute model")
350+
return Autoencoder.return_error(e)
341351
@staticmethod
342352
def return_error(error="error"):
353+
"""
354+
Returns an adequate formatted JSON for whenever there is an error.
355+
356+
Args:
357+
error (string): message detailing what type of error has been fired.
358+
"""
343359
return { "status": "error", "msg":error }

resources/src/ai/shallow_outliers.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,14 @@
1717
# You should have received a copy of the GNU General Public License
1818
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1919

20+
import os
21+
import sys
2022
import numpy as np
2123
import pandas as pd
2224
from sklearn.ensemble import IsolationForest
2325

26+
from resources.src.logger import logger
27+
2428
class ShallowOutliers:
2529
"""
2630
Shallow AI model for outliers detection. Used whenever there is no deep learning model defined.
@@ -43,11 +47,18 @@ def predict(self, arr):
4347
smooth_arr (numpy.ndarray): 1D numpy array with the smoothed data. Same shape as arr.
4448
"""
4549
if len(arr) == 0:
46-
raise ValueError("Input array must be non empty")
50+
error_msg = "Input array must be non-empty"
51+
logger.logger.error(error_msg)
52+
raise ValueError(error_msg)
4753
if arr.ndim != 1:
48-
raise ValueError("Input array must be 1-dimensional")
54+
error_msg = "Input array must be 1-dimensional"
55+
logger.logger.error(error_msg)
56+
raise ValueError(error_msg)
4957
if not np.issubdtype(arr.dtype, np.number):
50-
raise ValueError("Input array must contain numerical data")
58+
error_msg = "Input array must contain numerical data"
59+
logger.logger.error(error_msg)
60+
raise ValueError(error_msg)
61+
5162
window_size = max(int(0.05 * len(arr)), min(len(arr), int(5 + np.log(len(arr)))))
5263
window_size += 1 if window_size % 2 == 0 else 0
5364
kernel = np.arange(1, window_size // 2 + 2, dtype=float)
@@ -115,10 +126,20 @@ def compute_json(self, raw_json):
115126

116127
@staticmethod
117128
def execute_prediction_model(data):
118-
shallow_outliers = ShallowOutliers()
119-
result = shallow_outliers.compute_json(data)
120-
return result
129+
try:
130+
result = ShallowOutliers.compute_json(data)
131+
except Exception as e:
132+
result = ShallowOutliers.return_error(e)
133+
raise
134+
finally:
135+
return result
121136

122137
@staticmethod
123138
def return_error(error="error"):
139+
"""
140+
Returns an adequate formatted JSON for whenever there is an error.
141+
142+
Args:
143+
error (string): message detailing what type of error has been fired.
144+
"""
124145
return { "status": "error", "msg":error }

resources/src/ai/traffic_filter.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "selector",
3+
"dimension": "sensor_name",
4+
"value": "ASR"
5+
}

resources/src/ai/trainer.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,13 @@
2828
'''
2929
import sys
3030
import datetime
31+
import numpy as np
3132
import configparser
3233
from datetime import datetime
3334
from tensorflow.keras.optimizers import AdamW
34-
try:
35-
from ai.outliers import Autoencoder
36-
except:
37-
from src.ai.outliers import Autoencoder
38-
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
35+
36+
from resources.src.ai.outliers import Autoencoder
37+
from resources.src.logger.logger import logger
3938

4039
"""
4140
This module extends the Autoencoder class to allow further training of the model.
@@ -73,10 +72,14 @@ def save_model(self, save_model_file, save_config_file):
7372
"""
7473
if os.path.exists(save_model_file):
7574
if not os.access(save_model_file, os.W_OK):
76-
raise PermissionError(f"Permission denied: Cannot overwrite '{save_model_file}'")
75+
error_msg = f"Permission denied: Cannot overwrite '{save_model_file}'"
76+
logger.logger.error(error_msg)
77+
raise PermissionError(error_msg)
7778
if os.path.exists(save_config_file):
7879
if not os.access(save_config_file, os.W_OK):
79-
raise PermissionError(f"Permission denied: Cannot overwrite '{save_config_file}'")
80+
error_msg = f"Permission denied: Cannot overwrite '{save_config_file}'"
81+
logger.logger.error(error_msg)
82+
raise PermissionError(error_msg)
8083
self.model.save(save_model_file)
8184
new_model_config = configparser.ConfigParser()
8285
new_model_config.add_section('Columns')
@@ -107,18 +110,18 @@ def data_augmentation(self, data):
107110
"""
108111
return data
109112

110-
def prepare_data_for_training(self, data, augment = False):
113+
def prepare_data_for_training(self, raw_data, augment = False):
111114
"""
112115
Prepares data to be used for training the model.
113116
114117
Args:
115-
data (numpy ndarray): data to be used for training.
116-
118+
raw_data (list): list with the response form one or more druid queries.
117119
augment (boolean): set to True to generate more data for training.
118120
119121
Returns:
120122
prep_data (numpy ndarray): transformed data for its use in the model.
121123
"""
124+
data = np.concatenate([self.input_json(query_json)[0] for query_json in raw_data], axis = 0)
122125
prep_data = self.rescale(data)
123126
if augment:
124127
#TODO actually augment data
@@ -132,7 +135,7 @@ def train(self, raw_data, epochs=20, batch_size=32, backup_path=None):
132135
Given a druid query response, it is fed to the model for training.
133136
134137
Args:
135-
raw_data (json): response form a druid query.
138+
raw_data (list): list with the response form one or more druid queries.
136139
epochs (int): how many times should the model train on the data.
137140
batch_size (int): how many slices should the model take at once
138141
for training.
@@ -142,8 +145,7 @@ def train(self, raw_data, epochs=20, batch_size=32, backup_path=None):
142145
backup_path = "./backups/"
143146
date = datetime.now().strftime("%y-%m-%dT%H:%M")
144147
self.save_model(f"{backup_path}{date}.keras",f"{backup_path}{date}.ini")
145-
data = self.input_json(raw_data)[0]
146-
prep_data = self.prepare_data_for_training(data)
148+
prep_data = self.prepare_data_for_training(raw_data)
147149
self.model.fit(x=prep_data, y=prep_data, epochs = epochs, batch_size = batch_size, verbose = 0)
148150
loss = self.model_loss(prep_data, self.model.predict(prep_data), single_value=False).numpy()
149151
self.avg_loss = 0.9*self.avg_loss + 0.1*loss.mean()

resources/src/config.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ schedule_hour=* * * * *
1414
epochs=20
1515
batch_size=32
1616
backup_path=./backups/
17-
target_sensors=FlowSensor
17+
#target_sensors=FlowSensor
18+
model_names=traffic
1819

1920
[Druid]
2021
druid_endpoint=http://x.x.x.x:8080/druid/v2/

resources/src/config/configmanager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ def __init__(self, config_file):
2929
config_file (str): The path to the configuration file to read and write.
3030
"""
3131
self.config = configparser.ConfigParser()
32-
self.config.read(config_file)
32+
with open(config_file, 'r') as file:
33+
self.config.read_file(file)
3334

3435
def get(self, section, option):
3536
"""

resources/src/druid/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def execute_query(self, druid_query):
4040
4141
Returns:
4242
dict: The response from the Druid query in JSON format.
43-
43+
4444
Raises:
4545
Exception: If the Druid query fails with a non-200 status code.
4646
"""

0 commit comments

Comments
 (0)