-
Notifications
You must be signed in to change notification settings - Fork 2
/
environment.py
273 lines (221 loc) · 12.3 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import pickle
import time
from vars import *
import random
import torch
from DDPG import *
class System:
def __init__(self, eval = False, num_buildings = NUM_BUILDINGS, zeta = ZETA, january = False, RL_building = True, continuous = False, spot = False):
self.eval = eval
self.january = january
self.spot = spot
# If we are in eval mode, select the month of january
if self.eval:
if self.january:
self.random_day = 0 # First day of the year
else:
#np.random.seed(42) # Other wise always the same day for evaluation is selected
self.random_day = 304*24#random.randint(304, 365 - NUM_HOURS // 24 - 1) * 24
global NUM_HOURS
NUM_HOURS = 60*24
else:
# Else select November/December for training
self.random_day=random.randint(304,365-NUM_HOURS//24-1)*24
self.ambient_temperatures = pd.read_csv(
'../heating-RL-agent/data/environment/ninja_weather_55.6838_12.5354_uncorrected.csv',
header=3).iloc[self.random_day:self.random_day+NUM_HOURS+1,2]
self.ambient_temperature = self.ambient_temperatures[self.random_day]
### Based on the same day, choose the sun irradiation for the episode
self.sun_powers = pd.read_csv(
'../heating-RL-agent/data/environment/ninja_weather_55.6838_12.5354_uncorrected.csv',
header=3).iloc[self.random_day:self.random_day+NUM_HOURS+1,3]
self.continuous = continuous
self.RL_building = RL_building
self.num_buildings = num_buildings
self.brain_19 = torch.load('data/environment/heating-RL-agentDDPG-1h-19_more_responsive.pt',map_location=torch.device('cpu'))
self.brain_19_5 = torch.load('data/environment/heating-RL-agentDDPG-1h-19.5_more_responsive.pt',map_location=torch.device('cpu'))
self.brains = [self.brain_19, self.brain_19_5]
self.buildings = [Building(random_day = self.random_day, ambient_temperatures = self.ambient_temperatures,
sun_powers = self.sun_powers, name = 'Building_{}'.format(b), seed = b, RL_building = self.RL_building, brains = self.brains) for b in range(num_buildings)]
self.zeta = zeta
self.done = False
self.time = 0
self.total_power_cost = 0
self.heat_disutility = 0
self.total_load = sum(self.buildings[i].base_load for i in range(num_buildings))
def step(self, action):
if self.continuous:
price = action*MAX_PRICE
elif self.spot:
price = action
else:
price = PRICE_SET[int(action)]
total_load,total_base_load, building_costs, total_power_cost = self.get_loads_and_costs(price)
self.total_load = total_load
self.total_power_cost = total_power_cost
self.heat_disutility = building_costs - total_power_cost
self.ambient_temperature = self.ambient_temperatures[self.random_day + (self.time * TIME_STEP_SIZE) // 3600]
r = self.reward(total_load, building_costs)
self.time +=1
if self.time >= NUM_TIME_STEPS:
self.done = True
mean_temp_19 = np.mean([self.buildings[b].inside_temperature for b in range(self.num_buildings//2)])
mean_temp_19_5 = np.mean([self.buildings[b].inside_temperature for b in range(self.num_buildings//2, self.num_buildings)])
return [self.ambient_temperature, total_base_load, mean_temp_19, mean_temp_19_5, self.time % int(24 * 3600 // TIME_STEP_SIZE)], r, self.done #
def get_loads_and_costs(self, action):
total_load = 0
total_cost = 0
total_base_load = 0
total_power_cost = 0
for building in self.buildings:
load, base_load,cost, power_cost = building.step(action)
total_load += load
total_base_load += base_load
total_cost += cost
total_power_cost += power_cost
#print(total_load)
return total_load,total_base_load, total_cost, total_power_cost
def reward(self, total_load, building_costs):
penalty = np.maximum(0, total_load - L_MAX)
penalty *= LOAD_PENALTY
#print(penalty)
return - self.zeta * building_costs - (1-self.zeta) * penalty
def reset(self):
if self.eval:
if self.january:
self.random_day = 0 # First day of the year
else:
# Else select November/December for training
self.random_day=random.randint(304,365-NUM_HOURS//24-1)*24
self.ambient_temperatures = pd.read_csv(
'../heating-RL-agent/data/environment/ninja_weather_55.6838_12.5354_uncorrected.csv',
header=3).iloc[self.random_day:self.random_day+NUM_HOURS+1,2]
self.ambient_temperature = self.ambient_temperatures[self.random_day]
### Based on the same day, choose the sun irradiation for the episode
self.sun_powers = pd.read_csv(
'../heating-RL-agent/data/environment/ninja_weather_55.6838_12.5354_uncorrected.csv',
header=3).iloc[self.random_day:self.random_day+NUM_HOURS+1,3]
self.done = False
self.time = 0
total_load = 0
self.total_power_cost = 0
for building in self.buildings:
total_load += building.reset(self.random_day, self.ambient_temperatures, self.sun_powers)
mean_temp_19 = np.mean([self.buildings[b].inside_temperature for b in range(self.num_buildings//2)])
mean_temp_19_5 = np.mean([self.buildings[b].inside_temperature for b in range(self.num_buildings//2, self.num_buildings)])
return [self.ambient_temperature, total_load, mean_temp_19, mean_temp_19_5, self.time % int(24 * 3600 // TIME_STEP_SIZE)]
class Building:
""" This class represents the building that has to be controlled. Its dynamics are modelled based on an RC analogy.
When instanciated, it initialises the inside temperature to 21°C, the envelope temperature to 20, and resets the done
and time variables.
"""
def __init__(self,random_day, ambient_temperatures, sun_powers, name = '', seed = 0, RL_building = True, brains = ''):
self.random_day = random_day
self.ambient_temperatures = ambient_temperatures
self.ambient_temperature = self.ambient_temperatures[random_day]
self.sun_powers = sun_powers
self.sun_power = self.sun_powers[random_day]
self.name = name
self.time = 0
self.base_loads = pd.read_csv(
'../multi-building-RL/data/environment/2014_DK2_scaled_loads.csv',header=0).iloc[random_day:random_day+NUM_HOURS+1,1]
self.seed = seed
np.random.seed(self.seed)
self.RL_building = RL_building
if RL_building:
if seed < NUM_BUILDINGS//2:
self.brain = brains[0]
self.T_MIN = T_MIN - 0.5
else:
self.brain = brains[1]
self.T_MIN = T_MIN
self.brain.add_noise = False
self.brain.epsilon = 0
self.brain.eps_end = 0
else:
self.T_MIN = T_MIN
self.base_loads += np.random.normal(loc=0.0, scale=0.075/1000, size=NUM_HOURS+1)
self.base_load = self.base_loads[random_day]
self.inside_temperature = 20.5 + np.random.rand()
self.action = 0
def heat_pump_power(self, phi_e):
"""Takes an electrical power flow and converts it to a heat flow.
:param phi_e: The electrical power
:type phi_e: Float
:return: Returns the heat flow as an integer
"""
return phi_e*(0.0606*self.ambient_temperature+2.612)
def step(self, price):
"""
:param action: The chosen action - is the index of selected action from the action space.
:type action: Integer
:return: Returns the new state after a step, the reward for the action and the done state
"""
current_penalty = COMFORT_PENALTY * (np.maximum(0,self.T_MIN-self.inside_temperature) + np.maximum(0,self.inside_temperature-T_MAX))
current_temperature_deficit = (np.maximum(0,self.T_MIN-self.inside_temperature))
#expected_cost = (PRICE_SENSITIVITY * NOMINAL_HEAT_PUMP_POWER / (1e6) * price * TIME_STEP_SIZE / 3600)
if self.RL_building:
state = torch.tensor([self.inside_temperature,self.ambient_temperature,self.sun_power,price], dtype=torch.float).to(device)
state = self.brain.normalizer.normalize(state).unsqueeze(0)
selected_action = self.brain.select_action(state).type(torch.FloatTensor).item()
#print(selected_action)
else:
if current_temperature_deficit == 0:
selected_action = np.maximum(0, (PRICE_SET[-1] - 30 - price) / (PRICE_SET[-1] - 20))
elif (self.ambient_temperature >= 0) and (current_temperature_deficit <= 1.5):
#selected_action = -0.5*price/(PRICE_SET[-1]-10) + 1 + 5/(PRICE_SET[-1]-10)
selected_action = (PRICE_SET[-1] - price) / (PRICE_SET[-1] - 10)
elif self.ambient_temperature >= -2:
selected_action = (PRICE_SET[-1]-5-0.5*price)/ (PRICE_SET[-1] - 10)
else:
selected_action = (PRICE_SET[-1]-8-0.2*price)/ (PRICE_SET[-1] - 10)
self.action = selected_action
#expected_costs = self.compute_expected_costs(price)
#selected_action = HEATING_SETTINGS[np.argmin(expected_costs)]
delta = 1 / (R_IA * C_I) * (self.ambient_temperature - self.inside_temperature) + \
self.heat_pump_power(NOMINAL_HEAT_PUMP_POWER*selected_action)/C_I + A_w*self.sun_power/C_I
self.inside_temperature += delta * TIME_STEP_SIZE + 0.2*np.random.rand()-0.1
# Heat pump power is adjusted so that the power is expressed in MW and also adjusted to the correct time slot size
heat_pump_power = selected_action * NOMINAL_HEAT_PUMP_POWER / (1e6) * TIME_STEP_SIZE / 3600
total_load = (heat_pump_power + self.base_load * TIME_STEP_SIZE / 3600)
power_cost = total_load*price
total_cost = power_cost * PRICE_PENALTY + current_penalty
self.time +=1
self.ambient_temperature = self.ambient_temperatures[self.random_day + (self.time * TIME_STEP_SIZE)//3600]
self.sun_power = self.sun_powers[self.random_day + (self.time * TIME_STEP_SIZE)//3600]
self.base_load = self.base_loads[self.random_day + (self.time * TIME_STEP_SIZE) // 3600]
return total_load, self.base_load, total_cost, power_cost
def reset(self, random_day ,ambient_temperatures, sun_powers):
"""
This method is resetting the attributes of the building.
:return: Returns the resetted inside temperature, ambient temperature and sun power
"""
np.random.seed(self.seed)
self.inside_temperature = 20.5 + np.random.rand()
self.random_day = random_day
self.ambient_temperatures = ambient_temperatures
self.ambient_temperature = self.ambient_temperatures[random_day]
self.sun_powers = sun_powers
self.sun_power = self.sun_powers[random_day]
self.time = 0
self.base_loads = pd.read_csv(
'../multi-building-RL/data/environment/2014_DK2_scaled_loads.csv',
header=0).iloc[random_day:random_day+NUM_HOURS+1,1]
self.base_loads += np.random.normal(loc=0.0, scale=0.075/1000, size=NUM_HOURS+1)
self.base_load = self.base_loads[random_day]
return self.base_load
def compute_expected_costs(self,price):
costs = []
for heating_action in HEATING_SETTINGS:
expected_temperature_delta = 1 / (R_IA * C_I) * (self.ambient_temperature - self.inside_temperature) + \
self.heat_pump_power(NOMINAL_HEAT_PUMP_POWER*heating_action)/C_I + A_w*self.sun_power/C_I
expected_temperature = self.inside_temperature + expected_temperature_delta * TIME_STEP_SIZE
expected_heat_disutility = COMFORT_PENALTY * (np.maximum(0,self.T_MIN-expected_temperature))
expected_heating_cost = (PRICE_SENSITIVITY * heating_action * NOMINAL_HEAT_PUMP_POWER / (1e6) * price * TIME_STEP_SIZE / 3600)
print(expected_heating_cost)
costs.append(expected_heat_disutility + expected_heating_cost)
return costs