This repository has been archived by the owner on Sep 12, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsensor_training.py
131 lines (107 loc) · 4.03 KB
/
sensor_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
This wil just create a model for every sensor that is queried.
Some code is semipseudo code for untill InfluxDB is going.
"""
import os
import json
import influx_interact as ii
import clean as cl
import model_trainer as mt
import model_predict as mp
# loads setting json files
with open("./threshold_ratios.json") as f:
threshold_ratios = json.load(f)
with open("./time_step_sizes.json") as f:
time_step_sizes = json.load(f)
MODEL_SAVE_LOC = None
PERCENTILE_SAVE_LOC = None
SCALER_SAVE_LOC = None
SENSOR_LIST = [
"Campus Energy Centre Campus HW Main Meter Power",
"Campus Energy Centre Campus HW Main Meter Entering Water Temperature",
"Campus Energy Centre Campus HW Main Meter Flow",
"Campus Energy Centre Boiler B-1 Gas Pressure",
"Campus Energy Centre Boiler B-1 Exhaust O2",
]
# To be populated with buildings being evaluated
building_list = "Campus Energy Centre"
# Define a few variables with the name of your bucket, organization, and token.
bucket = "MDS2021"
org = "UBC"
# UDL provides public users READ access to the InfluxDB 2.0 instance via this token
token = os.getenv("MDS2021")
url = "http://206.12.92.81:8086"
# create class for
influxdb = ii.influx_class(org, url, bucket, token)
# provides main bucket data, no anomaly labelling
# Readings looks like it coule be Number instead
influx_data = influxdb.make_query(
building_list,
measurement="READINGS",
id=SENSOR_LIST,
)
# creates a dictionary of dataframes for each sensor in sensors_dict
sensors_dict = cl.split_sensors(influx_data)
# create empty dictionary
normal_bucket = {}
abnormal_bucket = {}
for key, df in sensors_dict.items():
print("Training for : {}".format(key))
# Delete data that is about to be written, to prevent duplicaete write on timestamp
delete_api = influxdb.client.delete_api()
min_time = df["DateTime"].values[0]
max_time = df["DateTime"].values[-1]
delete_api.delete(
str(min_time) + "Z",
str(max_time) + "Z",
'_measurement="TRAINING_ANOMALY" AND uniqueID="{}"'.format(key),
bucket=bucket,
org=org,
)
# creates standardized column for each sensor in main bucket
sensors_dict[key]["Stand_Val"] = cl.std_val_train(
df[["Value"]], sensors_dict[key]["ID"].any(), SCALER_SAVE_LOC
)
# train on only data points not flagged manually
df = df[df.manual_anomaly is not True]
# creates sequences for sliding windows for training
threshold_ratio = threshold_ratios[key]
time_steps = time_step_sizes[key]
window_size = time_steps
x_train, y_train = mt.create_sequences(
df["Stand_Val"], df["Stand_Val"], time_steps, window_size
)
x_eval, y_eval = mt.create_sequences(
df["Stand_Val"], df["Stand_Val"], time_steps, 1
)
# format needed for fit model
normal_dict = cl.model_parser(normal_bucket[key], x_train, y_train, x_eval)
# creates model file for training data.
mt.fit_models(normal_dict, MODEL_SAVE_LOC, PERCENTILE_SAVE_LOC)
# creates sequences for sliding windows for predicting on the train set
timestamps = df["DateTime"].tail(len(df) - x_train.shape[1] + 1).values
val_nums = df["Value"].tail(len(df) - x_train.shape[1] + 1).values
manual_anomaly = df["manual_anomaly"].tail(len(df) - x_train.shape[1]).values
loss_percentile = cl.load_loss_percentile(key, file_path=PERCENTILE_SAVE_LOC)
threshold = loss_percentile * threshold_ratio
# predicting and prediction formatting
pred_df = mp.make_prediction(
key,
x_eval,
timestamps,
threshold,
val_nums,
MODEL_SAVE_LOC,
anomaly_type="model_anomaly",
manual_anomaly=manual_anomaly,
)
pred_df = pred_df[["uniqueID", "val_num", "model_anomaly", "manual_anomaly"]]
print(pred_df.groupby("model_anomaly").count())
print(pred_df.groupby("manual_anomaly").count())
# write to influxDB
influxdb.write_data(
pred_df,
"TRAINING_ANOMALY",
tags=["uniqueID", "model_anomaly", "manual_anomaly"],
)
influxdb.client.close()