-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathJEPS.py
29 lines (24 loc) · 1.24 KB
/
JEPS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def update_history(resources, z):
for resource in resources:
resource.state = resource.units[0].state.copy() # update resource state
resource.h[z] = (resource.prev_state, resource.last_action)
return resources
def update_policy_JEPS(resource, states, actions, r_best, time_max, GAMMA, STACT):
for z in range(time_max-1):
s = resource.h[z][0]
a = resource.h[z][1]
if a != None:
s_index = states.index(s) # previous state
a_index = actions.index(a) # taken action
for job in s:
if job == a:
if STACT == "st_act":
resource.policy[s_index,a_index] = resource.policy[s_index,a_index] + (GAMMA * (1 - resource.policy[s_index,a_index]))
if STACT == "act":
resource.policy[a_index] = resource.policy[a_index] + (GAMMA * (1 - resource.policy[a_index]))
else:
if STACT == "st_act":
resource.policy[s_index,a_index] = (1 - GAMMA) * resource.policy[s_index,a_index]
if STACT == "act":
resource.policy[a_index] = (1 - GAMMA) * resource.policy[a_index]
return resource