-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidation.py
66 lines (60 loc) · 2.57 KB
/
validation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
Code to produce validation stats and graphs.
Jamie Taylor
Fariba Yousefi
2022-05-03
"""
import numpy as np
from scipy import stats
from matplotlib import pyplot as plt
import seaborn as sns
np.random.seed(42)
def predicted_vs_actual_ratio(pes_gsp_data):
fig = plt.figure()
plt.scatter(pes_gsp_data.loc[pes_gsp_data.set=="test", "gsp_pes_ratio"].to_numpy(),
pes_gsp_data.loc[pes_gsp_data.set=="test", "predicted_GSP_PES_ratio"], s=0.01)
plt.xlim(-0.1, 1)
plt.ylim(-0.1, 1)
plt.xlabel("Actual GSP:PES demand ratio")
plt.ylabel("Predicted GSP:PES demand ratio")
res = stats.linregress(
pes_gsp_data.loc[pes_gsp_data.set=="test", "gsp_pes_ratio"].to_numpy(),
pes_gsp_data.loc[pes_gsp_data.set=="test", "predicted_GSP_PES_ratio"].to_numpy()
)
fig.text(.7, .5, f"Linear fit: y = {res.slope:.2f}x + {res.intercept:.2f}\n"
f"(r-squared: {res.rvalue**2:.3f})", ha='center')
# plt.show()
return
def plot_timeseries(pes_gsp_data, start="2018-07-01", end="2018-07-20", n_gsps=10):
plt.rcParams['figure.dpi'] = 250
ids = pes_gsp_data.loc[pes_gsp_data.set=="test", "region_id_20210423"].unique()
for region_id in np.random.choice(ids, size=n_gsps, replace=False):
plotdata = pes_gsp_data.loc[
(pes_gsp_data.set=="test") & \
(pes_gsp_data.region_id_20210423 == region_id) & \
(pes_gsp_data.timestamp >= start) & \
(pes_gsp_data.timestamp <= end)
].sort_values("timestamp")
plt.figure()
ax = plotdata.plot(x="timestamp", y="gsp_meter_volume", label="Actual", title=f"Region {region_id}");
ax = plotdata.plot(x="timestamp", y="predicted_GSP_meter_volume", label="Predicted", ax=ax);
plt.xlabel("Timestamp", fontsize=16)
plt.ylabel("GSP demand (MW)", fontsize=16)
plt.title(f"Region {region_id}", fontsize = 30)
# plt.rcParams['figure.dpi'] = 150
return
def histogram_residuals(pes_gsp_data):
# import pdb; pdb.set_trace()
residuals = pes_gsp_data.loc[pes_gsp_data.set=="test", "predicted_GSP_meter_volume"] - \
pes_gsp_data.loc[pes_gsp_data.set=="test", "gsp_meter_volume"]
norm_residuals = residuals / pes_gsp_data.loc[pes_gsp_data.set=="test", "gsp_meter_volume"]
plt.figure()
residuals[np.isfinite(residuals)].hist(bins=30)
plt.figure()
norm_residuals[np.isfinite(norm_residuals)].hist(bins=30)
return
def run_validation(pes_gsp_data):
predicted_vs_actual_ratio(pes_gsp_data)
plot_timeseries(pes_gsp_data)
histogram_residuals(pes_gsp_data)
plt.show()