Skip to content

Commit

Permalink
bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
WemelsfelderML committed Apr 16, 2020
1 parent 9244b3f commit fd77219
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 54 deletions.
6 changes: 4 additions & 2 deletions MILP.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ def MILP_solve(M, LV, GV, N):
print('Warning: MILP and schedule simulation give different objective'
' values')

# print(schedule)
# print(objVal)
(s, p, delta, gamma, WT, fMax, FMax, f) = decVars
print(s[0])
print(f[0])

return schedule, objVal


Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# RL_scheduling

The scheduling algorithm can be executed by running main.py. Settings can be changed in settings.py. This includes the choice between using Q-learning and JEPS, and deciding the number of resources and jobs.
The scheduling algorithm can be executed by running main.py, and setting parameters in the main() instance of this file. This includes the choice between using Q-learning and JEPS, and deciding the number of resources and jobs.
29 changes: 17 additions & 12 deletions RL.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import numpy as np
import random
from itertools import chain, combinations
from settings import *
from tester import *
from main import *

def update_policy_Q(resources, states, actions, STACT):
for resource in resources:
Expand Down Expand Up @@ -66,6 +65,7 @@ def __init__(self, i, q):
def reset(self, waiting):
self.processing = None # job that is being processed
self.c = None # completion time of current job
self.c_idle = None # time of becoming idle after job completion

# state of unit = jobs waiting to be processed on unit
if self.q == 0:
Expand Down Expand Up @@ -109,7 +109,7 @@ def reset(self):
self.DONE = False

# TAKE A TIMESTEP
def step(self, z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res, heur_order):
def step(self, z, GV, N, METHOD, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res, heur_order):

for resource in self.resources:
resource.reward -= 1 # timestep penalty
Expand All @@ -120,14 +120,16 @@ def step(self, z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res
unit = resource.units[q]
if unit.c == z:
job = unit.processing # remember what job it was processing
if q == (GV-1):
job.done = True # set job to done
job.c = z # save completion time of job

if unit.c_idle == z:
job = unit.processing
unit.processing = None # set unit to idle

if q < (GV-1): # if this is not the last
if q < (GV-1): # if this is not the last
nxt = resource.units[q+1] # next unit in the resource
nxt.state.append(job) # add job to waiting list for next unit
else:
job.done = True # set job to done
job.c = z # save completion time of job

# CHECK WHETHER ALL JOBS ARE FINISHED
if all([job.done for job in self.jobs]):
Expand All @@ -144,8 +146,10 @@ def step(self, z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res
if unit.processing == None: # check if unit is currently idle
job = unit.state.pop(0) # pick first waiting job
unit.processing = job # set unit to processing selected job
duration = delta[job.j][unit.q][resource.i]
unit.c = z + duration # set completion time
completion = z + delta[job.j][unit.q][resource.i]
unit.c = completion # set completion time
unit.c_idle = completion + 1
resource.schedule.append((job.j,z))

# START PROCESSING OF NEW JOBS
first_units = set([resource.units[0] for resource in self.resources])
Expand Down Expand Up @@ -192,8 +196,9 @@ def step(self, z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res
unit.state.remove(job) # remove job from all waiting lists
unit.processing = job # set unit to processing job
job.t = z
duration = delta[job.j][unit.q][resource.i]
unit.c = z + duration # set completion time on unit
completion = z + delta[job.j][unit.q][resource.i]
unit.c = completion # set completion time on unit
unit.c_idle = completion +1
resource.schedule.append((job.j,z)) # add to schedule
else:
resource.last_action = None
Expand Down
56 changes: 30 additions & 26 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def calculate_reward(RL):

return Cmax

def update_policy_JEPS(resource, states, actions, r_best, time_max, GAMMA):
def update_policy_JEPS(resource, states, actions, r_best, time_max, GAMMA, STACT):
for z in range(time_max-1):
s = resource.h[z][0]
a = resource.h[z][1]
Expand All @@ -99,33 +99,39 @@ def update_policy_JEPS(resource, states, actions, r_best, time_max, GAMMA):
return resource

def make_schedule(RL):
schedule = dict()
schedule = []
for resource in RL.resources:
schedule[resource.i] = resource.schedule
schedule.append(resource.schedule)
return schedule

def find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, heur_job, heur_res, heur_order, EPOCHS, METHOD, STACT):
def find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, EPOCHS, METHOD, STACT):

# Generate heuristics for Q_learning rewards
heur_job = heuristic_best_job(delta, LV, GV, N)
heur_res = heuristic_best_resource(heur_job)
heur_order = heuristic_order(delta, LV, GV, N)

if STACT == "st_act": # st_act for state-action pairs, act for only actions
policy_init = np.zeros([2**N, N+1]) # states, actions
if STACT == "act": # st_act for state-action pairs, act for only actions
policy_init = np.zeros([N+1]) # actions

RL = MDP(LV, GV, N, policy_init) # initialize MDP
r_best = 99999
best_schedule = dict()
best_schedule = []
epoch_best_found = 0
timer_start = time.time()
for epoch in range(EPOCHS):
if epoch%100==0:
print(epoch)
# if epoch%100==0:
# print(epoch)

DONE = False
z = 0
RL.reset()

# take timesteps until processing of all jobs is finished
while not DONE:
RL, DONE = RL.step(z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res, heur_order)
RL, DONE = RL.step(z, GV, N, METHOD, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res, heur_order)
z += 1

r = calculate_reward(RL)
Expand All @@ -140,7 +146,7 @@ def find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, heur_job, heur_res
actions = RL.actions

for i in range(len(resources)):
resource = update_policy_JEPS(resources[i], states, actions, r_best, z, GAMMA)
resource = update_policy_JEPS(resources[i], states, actions, r_best, z, GAMMA, STACT)
RL.resources[i] = resource

timer_finish = time.time()
Expand All @@ -154,41 +160,39 @@ def write_log(OUTPUT_DIR, METHOD, STACT, N, LV, GV, EPOCHS, ALPHA, GAMMA, EPSILO

def main():
M = 1 # number of work stations
LV = 4 # number of resources
GV = 3 # number of units per resource
N = 10 # number of jobs
LV = 3 # number of resources
GV = 2 # number of units per resource
N = 7 # number of jobs

ALPHA = 0.2 # learning rate (0<α≤1): the extent to which Q-values are updated every timestep
GAMMA = 0.7 # discount factor (0≤γ≤1): how much importance to give to future rewards (1 = long term, 0 = greedy)
EPSILON = 0.2 # probability of choosing a random action (= exploring)
EPSILON = 0.4 # probability of choosing a random action (= exploring)

METHOD = "JEPS"
STACT = "act"

EPOCHS = 1000 # set number of epochs to train RL model
EPOCHS = 10000 # set number of epochs to train RL model
OUTPUT_DIR = '../output/'

file = open(OUTPUT_DIR+"log.csv",'a')
file.write("METHOD,STACT,N,LV,GV,EPOCHS,ALPHA,GAMMA,EPSILON,MAKESPAN,TIME,EPOCH_BEST")
file.close()

print("START TESTING")
# for LV in range(1,10): # number of resources
# for GV in range(1,5): # number of units per resource
# for N in range(1,100): # number of jobs
# for N in range(1,25):
# for LV in range(1,11):
# for GV in range(1,11):
# for EPOCHS in range(10001,500):

ins = MILP_instance(M, LV, GV, N)
# best_schedule, best_makespan = MILP_solve(M, LV, GV, N)
# print(best_schedule, best_makespan)
best_schedule, best_makespan = MILP_solve(M, LV, GV, N)
print(best_schedule, best_makespan)

delta = np.round(ins.lAreaInstances[0].tau)
print(delta)

heur_job = heuristic_best_job(delta, LV, GV, N)
heur_res = heuristic_best_resource(heur_job)
heur_order = heuristic_order(delta, LV, GV, N)

# print(str(LV)+","+str(GV)+","+str(N)+","+str(EPSILON)+","+str(GAMMA))
print("N: "+str(N)+", LV: "+str(LV)+", EPSILON: "+str(EPSILON)+", GAMMA: "+str(GAMMA))

makespan, schedule, epoch, calc_time, RL = find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, heur_job, heur_res, heur_order, EPOCHS, METHOD, STACT)
makespan, schedule, epoch, calc_time, RL = find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, EPOCHS, METHOD, STACT)
print(schedule)
print(makespan)
print(calc_time)
Expand Down
13 changes: 0 additions & 13 deletions settings.py

This file was deleted.

0 comments on commit fd77219

Please sign in to comment.