bugfix

JoostBerkhout · Apr 16, 2020 · fd77219 · fd77219
1 parent 9244b3f
commit fd77219
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 54 deletions.
diff --git a/MILP.py b/MILP.py
@@ -35,8 +35,10 @@ def MILP_solve(M, LV, GV, N):
 	    print('Warning: MILP and schedule simulation give different objective'
 	          ' values')
 
-	# print(schedule)
-	# print(objVal)
+	(s, p, delta, gamma, WT, fMax, FMax, f) = decVars
+	print(s[0])
+	print(f[0])
+
 	return schedule, objVal
 
 

diff --git a/README.md b/README.md
@@ -1,3 +1,3 @@
 # RL_scheduling
 
-The scheduling algorithm can be executed by running main.py. Settings can be changed in settings.py. This includes the choice between using Q-learning and JEPS, and deciding the number of resources and jobs.
+The scheduling algorithm can be executed by running main.py, and setting parameters in the main() instance of this file. This includes the choice between using Q-learning and JEPS, and deciding the number of resources and jobs.
diff --git a/RL.py b/RL.py
@@ -1,8 +1,7 @@
 import numpy as np
 import random
 from itertools import chain, combinations
-from settings import *
-from tester import *
+from main import *
 
 def update_policy_Q(resources, states, actions, STACT):
     for resource in resources:
@@ -66,6 +65,7 @@ def __init__(self, i, q):
     def reset(self, waiting):
         self.processing = None  # job that is being processed
         self.c = None           # completion time of current job
+        self.c_idle = None      # time of becoming idle after job completion
 
         # state of unit = jobs waiting to be processed on unit
         if self.q == 0:
@@ -109,7 +109,7 @@ def reset(self):
         self.DONE = False
 
     # TAKE A TIMESTEP
-    def step(self, z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res, heur_order):
+    def step(self, z, GV, N, METHOD, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res, heur_order):
 
         for resource in self.resources:
             resource.reward -= 1                            # timestep penalty
@@ -120,14 +120,16 @@ def step(self, z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res
                 unit = resource.units[q]
                 if unit.c == z:
                     job = unit.processing           # remember what job it was processing
+                    if q == (GV-1):
+                        job.done = True             # set job to done
+                        job.c = z                   # save completion time of job
+
+                if unit.c_idle == z:
+                    job = unit.processing
                     unit.processing = None          # set unit to idle
-
-                    if q < (GV-1):                 # if this is not the last 
+                    if q < (GV-1):                  # if this is not the last 
                         nxt = resource.units[q+1]   # next unit in the resource
                         nxt.state.append(job)       # add job to waiting list for next unit
-                    else:
-                        job.done = True             # set job to done
-                        job.c = z                   # save completion time of job
 
         # CHECK WHETHER ALL JOBS ARE FINISHED
         if all([job.done for job in self.jobs]):
@@ -144,8 +146,10 @@ def step(self, z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res
                     if unit.processing == None:     # check if unit is currently idle
                         job = unit.state.pop(0)     # pick first waiting job
                         unit.processing = job       # set unit to processing selected job
-                        duration = delta[job.j][unit.q][resource.i]
-                        unit.c = z + duration       # set completion time
+                        completion = z + delta[job.j][unit.q][resource.i]
+                        unit.c = completion         # set completion time
+                        unit.c_idle = completion + 1
+                        resource.schedule.append((job.j,z))
 
         # START PROCESSING OF NEW JOBS
         first_units = set([resource.units[0] for resource in self.resources])
@@ -192,8 +196,9 @@ def step(self, z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res
                     unit.state.remove(job)                  # remove job from all waiting lists
                     unit.processing = job                   # set unit to processing job
                     job.t = z 
-                    duration = delta[job.j][unit.q][resource.i]
-                    unit.c = z + duration                   # set completion time on unit
+                    completion = z + delta[job.j][unit.q][resource.i]
+                    unit.c = completion                     # set completion time on unit
+                    unit.c_idle = completion +1
                     resource.schedule.append((job.j,z))     # add to schedule
             else:
                 resource.last_action = None

diff --git a/main.py b/main.py
@@ -78,7 +78,7 @@ def calculate_reward(RL):
 
     return Cmax
 
-def update_policy_JEPS(resource, states, actions, r_best, time_max, GAMMA):
+def update_policy_JEPS(resource, states, actions, r_best, time_max, GAMMA, STACT):
     for z in range(time_max-1):
         s = resource.h[z][0]
         a = resource.h[z][1]
@@ -99,33 +99,39 @@ def update_policy_JEPS(resource, states, actions, r_best, time_max, GAMMA):
     return resource
 
 def make_schedule(RL):
-    schedule = dict()
+    schedule = []
     for resource in RL.resources:
-        schedule[resource.i] = resource.schedule
+        schedule.append(resource.schedule)
     return schedule
 
-def find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, heur_job, heur_res, heur_order, EPOCHS, METHOD, STACT):
+def find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, EPOCHS, METHOD, STACT):
+
+    # Generate heuristics for Q_learning rewards
+    heur_job = heuristic_best_job(delta, LV, GV, N)
+    heur_res = heuristic_best_resource(heur_job)
+    heur_order = heuristic_order(delta, LV, GV, N)
+
     if STACT == "st_act":                       # st_act for state-action pairs, act for only actions
         policy_init = np.zeros([2**N, N+1])     # states, actions
     if STACT == "act":                          # st_act for state-action pairs, act for only actions
         policy_init = np.zeros([N+1])           # actions
 
     RL = MDP(LV, GV, N, policy_init)            # initialize MDP
     r_best = 99999
-    best_schedule = dict()
+    best_schedule = []
     epoch_best_found = 0
     timer_start = time.time()
     for epoch in range(EPOCHS):
-        if epoch%100==0:
-            print(epoch)
+        # if epoch%100==0:
+        #     print(epoch)
 
         DONE = False
         z = 0
         RL.reset()
 
         # take timesteps until processing of all jobs is finished
         while not DONE:
-            RL, DONE = RL.step(z, GV, N, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res, heur_order)
+            RL, DONE = RL.step(z, GV, N, METHOD, delta, ALPHA, GAMMA, EPSILON, STACT, heur_job, heur_res, heur_order)
             z += 1
 
         r = calculate_reward(RL)
@@ -140,7 +146,7 @@ def find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, heur_job, heur_res
                 actions = RL.actions
 
                 for i in range(len(resources)):
-                    resource = update_policy_JEPS(resources[i], states, actions, r_best, z, GAMMA)
+                    resource = update_policy_JEPS(resources[i], states, actions, r_best, z, GAMMA, STACT)
                     RL.resources[i] = resource
 
     timer_finish = time.time()
@@ -154,41 +160,39 @@ def write_log(OUTPUT_DIR, METHOD, STACT, N, LV, GV, EPOCHS, ALPHA, GAMMA, EPSILO
 
 def main():
     M = 1       # number of work stations
-    LV = 4      # number of resources
-    GV = 3      # number of units per resource
-    N = 10      # number of jobs
+    LV = 3      # number of resources
+    GV = 2      # number of units per resource
+    N = 7      # number of jobs
 
     ALPHA = 0.2     # learning rate (0<α≤1): the extent to which Q-values are updated every timestep
     GAMMA = 0.7     # discount factor (0≤γ≤1): how much importance to give to future rewards (1 = long term, 0 = greedy)   
-    EPSILON = 0.2   # probability of choosing a random action (= exploring)
+    EPSILON = 0.4   # probability of choosing a random action (= exploring)
 
     METHOD = "JEPS"
     STACT = "act"
 
-    EPOCHS = 1000       # set number of epochs to train RL model
+    EPOCHS = 10000       # set number of epochs to train RL model
     OUTPUT_DIR = '../output/'
 
     file = open(OUTPUT_DIR+"log.csv",'a')
     file.write("METHOD,STACT,N,LV,GV,EPOCHS,ALPHA,GAMMA,EPSILON,MAKESPAN,TIME,EPOCH_BEST")
     file.close() 
 
-    print("START TESTING")
-    # for LV in range(1,10):                  # number of resources
-    #     for GV in range(1,5):               # number of units per resource
-    #         for N in range(1,100):          # number of jobs
+    # for N in range(1,25):
+    #     for LV in range(1,11):
+    #         for GV in range(1,11):
+    #             for EPOCHS in range(10001,500):
+
     ins = MILP_instance(M, LV, GV, N)
-    # best_schedule, best_makespan = MILP_solve(M, LV, GV, N)
-    # print(best_schedule, best_makespan)
+    best_schedule, best_makespan = MILP_solve(M, LV, GV, N)
+    print(best_schedule, best_makespan)
+
     delta = np.round(ins.lAreaInstances[0].tau)
     print(delta)
 
-    heur_job = heuristic_best_job(delta, LV, GV, N)
-    heur_res = heuristic_best_resource(heur_job)
-    heur_order = heuristic_order(delta, LV, GV, N)
-
-    # print(str(LV)+","+str(GV)+","+str(N)+","+str(EPSILON)+","+str(GAMMA))
+    print("N: "+str(N)+", LV: "+str(LV)+", EPSILON: "+str(EPSILON)+", GAMMA: "+str(GAMMA))
 
-    makespan, schedule, epoch, calc_time, RL = find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, heur_job, heur_res, heur_order, EPOCHS, METHOD, STACT)
+    makespan, schedule, epoch, calc_time, RL = find_schedule(M, LV, GV, N, delta, ALPHA, GAMMA, EPSILON, EPOCHS, METHOD, STACT)
     print(schedule)
     print(makespan)
     print(calc_time)

diff --git a/settings.py b/settings.py