-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathQ_learning.py
75 lines (65 loc) · 2.61 KB
/
Q_learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def update_policy_Q(resources, states, actions, STACT, ALPHA, GAMMA):
for resource in resources:
resource.state = resource.units[0].state.copy() # update resource state
if resource.last_action != None:
s1 = states.index(resource.state) # current state
s0 = states.index(resource.prev_state) # previous state
a = actions.index(resource.last_action) # taken action
if STACT == "st_act":
next_max = np.max(resource.policy[s1]) # max q-value of current state
q_old = resource.policy[s0, a]
q_new = (1 - GAMMA) * q_old + GAMMA * (resource.reward + ALPHA * next_max)
resource.policy[s0, a] = q_new
if STACT == "act":
next_max = resource.policy[a] # q-value of current state
q_old = resource.policy[a]
q_new = (1 - GAMMA) * q_old + GAMMA * (resource.reward + ALPHA * next_max)
resource.policy[a] = q_new
resource.reward = 0 # reset reward
return resources
def heuristic_best_job(tau, LV, GV, N):
heur_job = dict()
for i in range(LV):
heur_j = dict()
for j in range(N):
j_total = 0
for q in range(GV):
j_total += tau[j][q][i]
heur_j[j] = j_total
heur_job[i] = heur_j
return heur_job
def heuristic_best_resource(heur_j):
heur_r = dict()
for j in heur_j[0].keys():
heur_r[j] = dict()
for r in heur_j.keys():
heur_r[j][r] = heur_j[r][j]
return heur_r
def heuristic_order(delta, LV, GV, N):
all_jobs = list(range(N))
heur_order = dict() # key = resource i
for i in range(LV):
r_dict = dict() # key = job j
for j in range(N):
j_dict = dict() # key = job o
other = all_jobs.copy()
other.remove(j)
for o in other:
counter = 0
spare = 0
for q in range(GV-1):
dj = delta[j][q+1][i]
do = delta[o][q][i]
blocking = dj-do
if blocking < 0:
spare += blocking
if blocking > 0:
if spare >= blocking:
spare -= blocking
else:
blocking -= spare
counter += blocking
j_dict[o] = counter
r_dict[j] = j_dict
heur_order[i] = r_dict
return heur_order