Hyperparameters.

dtr0x · dtr0x · commit 53010222fbdb · 2019-11-07T12:12:23.000-05:00
diff --git a/extract_coco_data.py b/extract_coco_data.py
@@ -46,7 +46,6 @@
         img_score = img_out[target].item()
         img_gt_score = img_gt_out[target].item()
         if img_score < IMG_THRESH and img_gt_score >= OBJ_THRESH:
-            print(target, img_out, img_gt_out)
             if is_car:
                 car_imgs += 1
                 img.save("coco_voc_images/car/{}.jpg".format(ann['id']))
diff --git a/extract_voc_data.py b/extract_voc_data.py
@@ -46,7 +46,6 @@ def parse_data(year):
             img_score = img_out[target].item()
             img_gt_score = img_gt_out[target].item()
             if img_score < IMG_THRESH and img_gt_score >= OBJ_THRESH:
-                print(target, img_out, img_gt_out)
                 if is_car:
                     car_imgs += 1
                     img.save("coco_voc_images/car/{}".format(filename))
diff --git a/reinforcement.py b/reinforcement.py
@@ -5,6 +5,8 @@
 from numpy import argmax
 from classifier.ResNet import ResNet
 
+CONFIDENCE_THRESHOLD = 0.8
+
 # load the pre-trained classifier (trained on imagenet)
 classifier = ResNet().to(device)
 classifier.load_state_dict(torch.load("classifier/init_model.pth"))
@@ -76,7 +78,7 @@ def take_action(state, action):
     conf_new = calculate_conf(next_state)
        
     if done:
-        if conf_new >= 0.9:
+        if conf_new >= CONFIDENCE_THRESHOLD:
             reward = 3.0
         else:
             reward = -3.0
@@ -95,12 +97,13 @@ def find_positive_actions(state):
 
 def find_best_action(state):
     confs = []
-    if calculate_conf(state) >= 0.9:
+    if calculate_conf(state) >= CONFIDENCE_THRESHOLD:
         return 8
     for i in range(8):
         reward, next_state, done = take_action(state, i)
         confs.append(calculate_conf(next_state))
     best_next_state_conf = argmax(confs)
+    #print([a.item() for a in confs])
     if calculate_conf(state) > confs[best_next_state_conf]:
         return None
     return best_next_state_conf
diff --git a/train.py b/train.py
@@ -7,13 +7,12 @@
 MODEL_PATH = "models"
 
 # Hyperparameters / utilities
-BATCH_SIZE = 5
-NUM_EPOCHS = 40
+BATCH_SIZE = 10
+NUM_EPOCHS = 100
 GAMMA = 0.995
-EPS_START = 0.9
+EPS_START = 1
 EPS_END = 0.1
-EPS_LEN = 20 # number of epochs to decay epsilon
-TARGET_UPDATE = 10
+EPS_LEN = 25 # number of epochs to decay epsilon
 
 eps_sched = np.linspace(EPS_START, EPS_END, EPS_LEN)
 
@@ -40,7 +39,6 @@ def select_action(states, eps):
                 action = random.choice(positive_actions)
             else:
                 action = random.randrange(9)
-            #action = random.randrange(9)
             actions.append(action)
         actions = torch.tensor(actions, device=device)
         print("random:", actions)
@@ -68,7 +66,7 @@ def select_action(states, eps):
         batch_steps = 0
         start = time.time()
         # perform actions on batch items until done
-        while len(states) > 0 and batch_steps < 50:
+        while len(states) > 0 and batch_steps < 40:
             actions = select_action(states, eps)
             states_new = []
             # store state transition for each each (state, action) pair