mattcamp · mharvan · Aug 3, 2020 · Aug 6, 2020 · adam-aph · Aug 6, 2020
diff --git a/upload-current.sh b/upload-current.sh
@@ -1,77 +1,89 @@
 #!/usr/bin/env bash
+#
+# Upload trained model s3 in a format compatible with DeepRacer model import functionality.
+# Example usage to upload the best model:
+#     ./upload-current.sh aws-deepracer-XXX model1 -b
+#
 
 S3_BUCKET=$1
 S3_PREFIX=$2
 
 MODEL_DIR=data/minio/bucket/current/model/
 
-while getopts ":c:" opt; do
+echo "Uploading to model ==>  s3://$S3_BUCKET/$S3_PREFIX  <=="
+
+USE_BEST=false
+while getopts ":c:b" opt; do
 case $opt in
-c) CHECKPOINT="$OPTARG"
-;;
-\?) echo "Invalid option -$OPTARG" >&2
-;;
+    c) CHECKPOINT="$OPTARG"
+       ;;
+    b) USE_BEST=true
+       ;;
+    \?) echo "Invalid option -$OPTARG" >&2
+        ;;
 esac
 done
 
 CHECKPOINT_FILE=$MODEL_DIR"deepracer_checkpoints.json"
 if [ ! -f ${CHECKPOINT_FILE} ]; then
   echo "Checkpoint file not found!"
   exit 1
-else
-  echo "found checkpoint index file "$CHECKPOINT_FILE
-fi;
+fi
+echo "found checkpoint index file "$CHECKPOINT_FILE
 
 if [ -z "$CHECKPOINT" ]; then
-  echo "Checkpoint not supplied, checking for latest checkpoint"
-
-  LAST_CHECKPOINT=`cat $CHECKPOINT_FILE |jq ".last_checkpoint.name"`
-  BEST_CHECKPOINT=`cat $CHECKPOINT_FILE |jq ".best_checkpoint.name"`
-
-  CHECKPOINT=$LAST_CHECKPOINT
-
-  echo "latest checkpoint = "$CHECKPOINT
+  #echo "Checkpoint not supplied, checking for latest checkpoint"
+  LAST_CHECKPOINT=`cat $CHECKPOINT_FILE |jq ".last_checkpoint.name" | sed s/\"//g`
+  BEST_CHECKPOINT=`cat $CHECKPOINT_FILE |jq ".best_checkpoint.name" | sed s/\"//g`
+  if $USE_BEST; then
+     CHECKPOINT=$BEST_CHECKPOINT
+     echo "Using best checkpoint ==>  $CHECKPOINT  <=="
+  else
+     CHECKPOINT=$LAST_CHECKPOINT
+     echo "Using latest checkpoint ==>  $CHECKPOINT  <=="
+  fi
 else
   echo "Checkpoint supplied: ["${CHECKPOINT}"]"
 fi
 
 MODEL=`echo $CHECKPOINT |sed "s@^[^0-9]*\([0-9]\+\).*@\1@"`
-mkdir -p checkpoint
+rm -rf checkpoint
+cp -a upload-template checkpoint
+mkdir -p checkpoint/model
 MODEL_FILE=$MODEL_DIR"model_"$MODEL".pb"
 METADATA_FILE=$MODEL_DIR"model_metadata.json"
 
-
 if test ! -f "$MODEL_FILE"; then
     echo "$MODEL_FILE doesn't exist"
     exit 1
-else
-  cp $MODEL_FILE checkpoint/  
 fi
 
 if test ! -f "$METADATA_FILE"; then
     echo "$METADATA_FILE doesn't exist"
     exit 1
-else
-  cp $METADATA_FILE checkpoint/  
 fi
 
-CHECKPOINT_FILES=`echo $CHECKPOINT* |sed "s/\"//g"`
-for i in $( find $MODEL_DIR -type f -name $CHECKPOINT_FILES ); do
-  cp $i checkpoint/  
+cp -v $MODEL_FILE checkpoint/model/
+cp -v $METADATA_FILE checkpoint/model/
+
+CHECKPOINT_FILES=$MODEL_DIR/${CHECKPOINT}*
+#for i in $( find $MODEL_DIR -type f -name ${CHECKPOINT}\* ); do
+for i in $CHECKPOINT_FILES
+do
+  cp -v $i checkpoint/model/
 done
 
-VAR1=`cat $CHECKPOINT_FILE |jq ".last_checkpoint = .best_checkpoint"`
-VAR2=`echo $VAR1 |jq ".last_checkpoint.name = $CHECKPOINT"`
-VAR3=`echo $VAR2 |jq ".best_checkpoint.name = $CHECKPOINT"`
-echo $VAR3 >checkpoint/deepracer_checkpoints.json
+echo $CHECKPOINT > checkpoint/model/.coach_checkpoint
+# File deepracer_checkpoints.json is optional.
 
-# upload files to s3
-for filename in checkpoint/*; do
-  aws s3 cp $filename s3://$S3_BUCKET/$S3_PREFIX/model/
-done
+# Cleanup upload destination
+aws s3 rm --recursive s3://$S3_BUCKET/$S3_PREFIX/
 
-tar -czvf ${CHECKPOINT}-checkpoint.tar.gz checkpoint/*
+# Upload files to s3
+aws s3 sync checkpoint/ s3://$S3_BUCKET/$S3_PREFIX/
 
+# Backup checkpoint
+tar -czvf ${CHECKPOINT}.tar.gz checkpoint
 rm -rf checkpoint
-echo 'done uploading model!'
 
+echo 'done uploading model!'
diff --git a/upload-template/ip/hyperparameters.json b/upload-template/ip/hyperparameters.json
@@ -0,0 +1,15 @@
+{
+  "batch_size": 64,
+  "beta_entropy": 0.01,
+  "discount_factor": 0.999,
+  "e_greedy_value": 1.0,
+  "epsilon_steps": 10000,
+  "exploration_type": "categorical",
+  "loss_type": "huber",
+  "lr": 0.0003,
+  "num_episodes_between_training": 20,
+  "num_epochs": 10,
+  "stack_size": 1,
+  "term_cond_avg_score": 100000.0,
+  "term_cond_max_episodes": 100000
+}
diff --git a/upload-template/reward_function.py b/upload-template/reward_function.py
@@ -0,0 +1,45 @@
+def reward_function(params):
+    '''
+    Example of rewarding the agent to stay inside two borders
+    and penalizing getting too close to the objects in front
+    '''
+
+    all_wheels_on_track = params['all_wheels_on_track']
+    distance_from_center = params['distance_from_center']
+    track_width = params['track_width']
+    objects_distance = params['objects_distance']
+    _, next_object_index = params['closest_objects']
+    objects_left_of_center = params['objects_left_of_center']
+    is_left_of_center = params['is_left_of_center']
+
+    # Initialize reward with a small number but not zero
+    # because zero means off-track or crashed
+    reward = 1e-3
+
+    # Reward if the agent stays inside the two borders of the track
+    if all_wheels_on_track and (0.5 * track_width - distance_from_center) >= 0.05:
+        reward_lane = 1.0
+    else:
+        reward_lane = 1e-3
+
+    # Penalize if the agent is too close to the next object
+    reward_avoid = 1.0
+
+    # Distance to the next object
+    distance_closest_object = objects_distance[next_object_index]
+    # Decide if the agent and the next object is on the same lane
+    is_same_lane = objects_left_of_center[next_object_index] == is_left_of_center
+
+    if is_same_lane:
+        if 0.5 <= distance_closest_object < 0.8: 
+            reward_avoid *= 0.5
+        elif 0.3 <= distance_closest_object < 0.5:
+            reward_avoid *= 0.2
+        elif distance_closest_object < 0.3:
+            reward_avoid = 1e-3 # Likely crashed
+
+    # Calculate reward by putting different weights on 
+    # the two aspects above
+    reward += 1.0 * reward_lane + 4.0 * reward_avoid
+
+    return reward