-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeature_drift_result_hyp.py
147 lines (93 loc) · 3.74 KB
/
feature_drift_result_hyp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import pandas as pd
import numpy as np
from river import drift
from scipy import stats
def custom_cd(features, drift_index, mannu, w_size, confidence):
fp_counts= [0 for i in range(len(features[0]))]
tps = [None for i in range(len(features[0]))]
reference_windows = [[] for i in range(10)]
test_windows = [[] for i in range(10)]
for idx in range(len(features[1900:2000])):
idx+=1900
for f_idx in range(10):
reference_windows[f_idx].append(features[idx][f_idx])
for idx in range(len(features[2000:6000])):
idx=idx+2000
for f_idx, feature in enumerate(features[idx]):
if len(test_windows[f_idx])==w_size:
res = mannu(test_windows[f_idx], reference_windows[f_idx])
if res[1]<confidence:
if idx>=drift_index:
tps[f_idx] = idx
return fp_counts, tps
else:
fp_counts[f_idx]+=1
test_windows[f_idx].pop(0)
test_windows[f_idx].append(feature)
else:
test_windows[f_idx].append(feature)
return fp_counts, tps
def river_cd(features, drift_index, cd_detectors):
fp_counts= [0 for i in range(len(features[0]))]
tps = [None for i in range(len(features[0]))]
for idx in range(len(features[1900:6000])):
idx = idx+1900
for f_idx, feature in enumerate(features[idx]):
cdd = cd_detectors[f_idx]
cdd.update(feature)
if cdd.drift_detected:
if idx >= drift_index:
tps[f_idx] = idx
return fp_counts, tps
else:
fp_counts[f_idx] +=1
return fp_counts, tps
def error_rate_drift(dataset_name, drift_index):
#retrieve actual labels and predicted labels
ddm_tp, ddm_fp, ddm_missed =[], [], []
eddm_tp, eddm_fp, eddm_missed =[], [], []
adwin_tp, adwin_fp, adwin_missed =[], [], []
kswin_tp, kswin_fp, kswin_missed =[], [], []
mannu_tp, mannu_fp, mannu_missed =[], [], []
ks_tp, ks_fp, ks_missed =[], [], []
for i in range(1,6):
print(i, "next dataset")
features = np.load(f"datasets/features_{dataset_name}_{i}.npy")
ddm_detectors = [drift.binary.DDM() for i in range(10)]
fp_counts, tps = river_cd(features, drift_index, ddm_detectors)
print("DDM")
print(fp_counts)
print(tps)
for i in range(1,6):
features = np.load(f"datasets/features_{dataset_name}_{i}.npy")
adwin_detectors = [drift.ADWIN() for i in range(10)]
fp_counts, tps = river_cd(features, drift_index, adwin_detectors)
print("ADWIN")
print(fp_counts)
print(tps)
for i in range(1,6):
features = np.load(f"datasets/features_{dataset_name}_{i}.npy")
kswin_detectors = [drift.KSWIN(window_size=200, stat_size=100) for i in range(10)]
fp_counts, tps = river_cd(features, drift_index, kswin_detectors)
print("KSWIN")
print(fp_counts)
print(tps)
for i in range(1,6):
features = np.load(f"datasets/features_{dataset_name}_{i}.npy")
mannu = stats.ks_2samp
w_size=100
confidence = 0.005
fp_counts, tps = custom_cd(features, drift_index, mannu, w_size, confidence)
print("MANNU")
print(fp_counts)
print(tps)
def main():
#params
dataset_name = "HYP_001"
drift_index = 5000
#3 functions for error-rate, features and 3-drift
error_rate_drift(dataset_name, drift_index)
#data = [135,154,29,228,3]
#print(np.mean(data), np.std(data))
if __name__ == '__main__':
main()