Merge pull request #15 from arekmula/finish_drawing

Finish drawing
arekmula · Jun 1, 2021 · 84ff4ae · 84ff4ae
2 parents 0e8bfc8 + 4739e7e
commit 84ff4ae
Show file tree

Hide file tree

Showing 5 changed files with 120 additions and 44 deletions.
diff --git a/README.md b/README.md
@@ -6,9 +6,11 @@ The images from the camera are being sent to the PC, where your hand and its pos
 The detected hand movement is then converted to drone steering commands which makes the drone replicate your movement.
 
 ## Steering
-- Palm -> Drawing
-- Fist -> Stop drawing
-- Two hands -> Stop Drawing
+There are two methods to draw the drawing. The method can be chosen
+by providing `finish_drawing` argument while running the script.:  
+- First allows the user to draw by **any hand gesture**. The drawing is finished by showing **two hands at once**.
+- The second allows the user to draw by the **palm gesture**. The drawing is finished by showing a **fist gesture**.
+  Note, that if more than one hand is being shown, the drawing will be made by a right hand.
 
 ![alt text](pictures/palm.png "PALM GESTURE")
 ![alt text](pictures/fist.png  "FIST GESTURE")
@@ -51,10 +53,17 @@ directory.
 To run the Tello Drawer use following commands:
 - To run with the the Tello drone:
 ```
-python3 main.py --image_source "tello" --local_ip "0.0.0.0" --local_port 8889
+python3 main.py 
+```
+While running up the script you can set additional parameters:
+```
+--finish_drawing - Finish drawing sign
+--max_area - The max area [cm] that drone can use to perform the drawing
+--min_length - Minimum length between points, to reduce number of points from detection
+--takeoff_offset - Takeoff move up offset in cm
 ```
 
-- You can also run the test drawing with your PC built-in camera or video that you recorded earlier.
+- You can also run the test drawing with your built-in PC camera or video that you recorded earlier.
 ```
 python3 main.py --image_source "built_camera" --camera_index 0
 python3 main.py --image_source "saved_file" --filepath "path/to/file"
@@ -65,7 +74,7 @@ python3 main.py --image_source "saved_file" --filepath "path/to/file"
 The dataset saver helps in gathering the data using the Tello drone for further processing.
 It connects to the Tello drone, activates the video stream, and saves each received frame.
 ```
-python3 dataset_saver.py --local_ip "0.0.0.0" --local_port 8889 --save_img True 
+python3 dataset_saver.py --save_img True 
 ```
 - Set fps with `--fps` flag
 - Set dataset saving directory with `--save_dir`
@@ -80,3 +89,5 @@ We haven't made any changes to the detector.
 We have to split the hand detections into 2 separate classes.
 The fist is responsible for the start/stop signal while the palm is responsible for drawing. To do so we created 
 classifier based on pretrained EfficientNetB0. Date base is available [here](https://www.gti.ssr.upm.es/data/HandGesture_database.html)
+
+TODO: Improve accuraccy of hand classification in real environment.
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
 opencv-python~=4.5.1.48
-tensorflow==2.4.1
+tensorflow==2.4.2
 numpy~=1.19.5
 djitellopy2
diff --git a/src/dataset_saver.py b/src/dataset_saver.py
@@ -3,12 +3,13 @@
 from argparse import ArgumentParser
 from pathlib import Path
 
-from tello import Tello
+from djitellopy import Tello
 
 
 def main(args):
-    tello = Tello(local_ip=args.local_ip, local_port=args.local_port)
-
+    tello = Tello()
+    tello.connect()
+    tello.streamon()
     # Create directory to save images if it doesn't exists
     if args.save_img:
         timestamp = str(time.time())
@@ -27,10 +28,8 @@ def main(args):
             cv2.destroyAllWindows()
             break
 
-        img = tello.read()
+        img = tello.get_frame_read().frame
         if img is not None:
-            # The image received from tello is RGB, OpenCV works in BGR format
-            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
 
             # Show the image
             cv2.imshow("tello", img)
@@ -44,8 +43,6 @@ def main(args):
 if __name__ == "__main__":
     parser = ArgumentParser()
 
-    parser.add_argument("--local_ip", metavar="local_ip", type=str, required=True)
-    parser.add_argument("--local_port", metavar="local_port", type=int, required=True)
     parser.add_argument("--save_img", metavar="save_img", type=bool, default=False)
     parser.add_argument("--save_dir", metavar="save_dir", type=str, default="dataset")
     parser.add_argument("--fps", metavar="fps", type=int, default=30)

diff --git a/src/image_processing/processing.py b/src/image_processing/processing.py
@@ -6,24 +6,56 @@
 
 
 class ImageProcessor:
-    def __init__(self, enlargebox_px=15, queue_size=20, drawing_state_threshold=0.5,
-                 inactivity_std_dev_threshold=4, activity_std_dev_lower_threshold=15,
+    TWO_HANDS_FINISH = 0
+    FIST_FINISH = 1
+
+    MINIMUM_QUEUE_SIZE = 5
+
+    def __init__(self, finish_drawing_sign, hand_detector_confidence, enlargebox_px=15, predictions_queue_size=20,
+                 drawing_state_threshold=0.5, inactivity_std_dev_threshold=4, activity_std_dev_lower_threshold=15,
                  activity_std_dev_upper_threshold=100):
-        self.hand_detector = HandDetector(confidence=0.6)
-        self.hand_classifier = HandClassifier()
+        """
+
+        :param finish_drawing_sign: Sign for finish drawing. Two hands or fist.
+        :param hand_detector_confidence: The minimal confidence for hand detector to classify detection as hand.
+        :param enlargebox_px: How much pixels should be added in each side to hand bbox to make it easier to classify.
+        :param predictions_queue_size: Size of last predictions queue.
+        :param drawing_state_threshold: Threshold of how many of the last predictions stored in the queue must be
+         assigned to either of the class to determine which class it is.
+        :param inactivity_std_dev_threshold: A maximum threshold of movement's standard deviation
+         to determine if stop sign appeared.
+        :param activity_std_dev_lower_threshold: A minimum threshold of movement's standard deviation to determine if
+        hand is in drawing state
+        :param activity_std_dev_upper_threshold: A maximum threshold of movement's standard deviation to determine if
+        hand is in drawing state and if it's not outlier.
+        """
+
+        self.finish_drawing_sign = self.TWO_HANDS_FINISH if finish_drawing_sign == "two_hands" else self.FIST_FINISH
+
+        if self.finish_drawing_sign == self.FIST_FINISH:
+            # Hand classifier is need only if we have selected finishing drawing by a fist
+            self.hand_classifier = HandClassifier()
+
+        self.hand_detector = HandDetector(confidence=hand_detector_confidence)
 
         self.enlargebox_pt = enlargebox_px
         self.drawing_state_threshold = drawing_state_threshold
         self.inactivity_std_dev_threshold = inactivity_std_dev_threshold
-        self.activity_std_dev_lower_threshold = activity_std_dev_lower_threshold
+        # When using fist finishing, the minimal standard deviation in movement has to be at some threshold, otherwise
+        # the standing palm hand might be classified as Fist, and the drawing might be finished.
+        self.activity_std_dev_lower_threshold = activity_std_dev_lower_threshold if\
+            self.finish_drawing_sign == self.FIST_FINISH else 0
         self.activity_std_dev_upper_threshold = activity_std_dev_upper_threshold
 
         self.image_size = self.hand_detector.get_image_size()
         self.path_image = np.zeros(shape=self.image_size, dtype=np.uint8)
 
         self.last_class_predictions = []
         self.last_box_predictions = []
-        self.queue_size = queue_size
+        # When using fist finishing, the queue_size needs to be bigger, so more last predictions
+        # are used to determine if it was Fist or Palm
+        self.predictions_queue_size = predictions_queue_size if self.finish_drawing_sign == self.FIST_FINISH else\
+            self.MINIMUM_QUEUE_SIZE
 
         self.drawing_state = False
         self.drawing_points = []
@@ -36,26 +68,34 @@ def process_img(self, frame):
 
         if len(boxes_images) > 0:
             if len(boxes_images) > 1:
-                # TODO: Handle it better
-                # If there's more than one hand, get right hand
-                # Right hand has minimum x value
-                # right_hand_index = np.argmin([box[0] for box in boxes])
-                # boxes_images = [boxes_images[right_hand_index]]
-                # boxes = [boxes[right_hand_index]]
-                self.finish_drawing = True
+                if self.finish_drawing_sign == self.TWO_HANDS_FINISH:
+                    # Finish drawing if two hands were detected.
+                    self.finish_drawing = True
+                else:
+                    # If there's more than one hand, get right hand
+                    # Right hand has minimum x value
+                    right_hand_index = np.argmin([box[0] for box in boxes])
+                    boxes_images = [boxes_images[right_hand_index]]
+                    boxes = [boxes[right_hand_index]]
 
             if not self.finish_drawing:
-                for idx, (box_image, box) in enumerate(zip(boxes_images, boxes)):
-                    prediction = self.hand_classifier.predict(box_image, should_preprocess_input=True)
-                    box_middle = [int(box[0] + box[2]/2), int(box[1]+box[3]/2)]
+                for box_image, box in zip(boxes_images, boxes):
 
+                    if self.finish_drawing_sign == self.FIST_FINISH:
+                        prediction = self.hand_classifier.predict(box_image, should_preprocess_input=True)
+                    else:
+                        # Create mock prediction from classifier, so it always thinks that it's palm
+                        prediction = [0, 1]
+
+                    box_middle = [int(box[0] + box[2]/2), int(box[1]+box[3]/2)]
                     self.add_predictions_to_queues(np.argmax(prediction), box_middle)
                     self.calculate_drawing_state()
 
                     if not self.is_outlier:
                         if self.drawing_state:
                             cv2.circle(self.path_image, tuple(box_middle), radius=2, color=(0, 255, 0), thickness=-1)
-                            cv2.putText(self.path_image, str(idx), tuple(box_middle), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+                            cv2.putText(self.path_image, str(len(self.drawing_points)), tuple(box_middle),
+                                        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                                         fontScale=1, color=(255, 0, 0))
                             self.drawing_points.append(box_middle)
                         else:
@@ -86,7 +126,8 @@ def add_predictions_to_queues(self, class_prediction, box_prediction):
             self.last_box_predictions.append(box_prediction)
 
     def is_queue_full(self):
-        if len(self.last_class_predictions) == self.queue_size and len(self.last_box_predictions) == self.queue_size:
+        if len(self.last_class_predictions) == self.predictions_queue_size and\
+                len(self.last_box_predictions) == self.predictions_queue_size:
             return True
         else:
             return False

diff --git a/src/main.py b/src/main.py
@@ -7,12 +7,20 @@
 
 
 def development_main(image_source, args):
+    """
+    Main function used to development using built-in camera or file.
+
+    :param image_source:
+    :param args:
+    :return:
+    """
     if image_source == "built_camera":
         cap = cv2.VideoCapture(args.camera_index)
     else:
         cap = cv2.VideoCapture(args.filepath)
 
-    image_processor = ImageProcessor()
+    image_processor = ImageProcessor(finish_drawing_sign=args.finish_drawing,
+                                     hand_detector_confidence=args.hand_detection_confidence)
 
     while cap.isOpened():
         while True:
@@ -39,14 +47,18 @@ def development_main(image_source, args):
     cap.release()
     cv2.destroyAllWindows()
 
-    drone_steering = DroneProcessor(max_area_cm=100)
-    # speed_values = drone_processing.calculate_speed(drawing_points)
-    rescaled_points = drone_steering.rescale_points(drawing_points)
-
 
 def tello_main(args):
-    image_processor = ImageProcessor()
-    drone_processor = DroneProcessor(max_area_cm=args.max_area, min_length_between_points_cm=args.min_length)
+    """
+    Main function used to control your drone using hand.
+
+    :param args:
+    :return:
+    """
+    image_processor = ImageProcessor(finish_drawing_sign=args.finish_drawing,
+                                     hand_detector_confidence=args.hand_detection_confidence)
+    drone_processor = DroneProcessor(max_area_cm=args.max_area, min_length_between_points_cm=args.min_length,
+                                     starting_move_up_cm=args.takeoff_offset)
 
     # Start pinigng tello to prevent it from landing
     drone_processor.start_pinging_tello()
@@ -81,10 +93,16 @@ def tello_main(args):
     # Finish drawing
     drone_processor.finish_drawing()
 
+    cv2.destroyAllWindows()
+
 
 def main(args):
     image_source = args.image_source
 
+    print(f"Image source: {args.image_source}")
+    print(f"Finish drawing sign: {args.finish_drawing}")
+    print(f"Hand detection confidence: {args.hand_detection_confidence}")
+
     if image_source == "built_camera" or image_source == "saved_file":
         development_main(image_source=image_source, args=args)
     else:
@@ -96,16 +114,25 @@ def main(args):
 
     parser.add_argument("--image_source", metavar="image_source", type=str, default="tello",
                         choices=["built_camera", "saved_file", "tello"])
+    parser.add_argument("--finish_drawing", metavar="finish_drawing", type=str, default="two_hands",
+                        choices=["two_hands", "fist"], help="Finish drawing sign")
     args, _ = parser.parse_known_args()
     if args.image_source == "saved_file":
         parser.add_argument("--filepath", metavar="filepath", type=str, required=True)
     elif args.image_source == "built_camera":
         parser.add_argument("--camera_index", metavar="camera_index", type=int, default=0)
     elif args.image_source == "tello":
-        parser.add_argument("--local_ip", metavar="local_ip", type=str, default="0.0.0.0")
-        parser.add_argument("--local_port", metavar="local_port", type=int, default=8889)
-        parser.add_argument("--max_area", metavar="max_area", type=int, default=100)
-        parser.add_argument("--min_length", metavar="min_length", type=int, default=5)
+        parser.add_argument("--max_area", metavar="max_area", type=int, default=100,
+                            help="The max area [cm] that drone can use to perform the drawing")
+        parser.add_argument("--min_length", metavar="min_length", type=int, default=5,
+                            help="Minimum length between points, to reduce number of points from detection")
+        parser.add_argument("--takeoff_offset", metavar="takeoff_offset", type=int, default=50,
+                            help="Takeoff move up offset in cm.")
+
+    parser.add_argument("--hand_detection_confidence", metavar="hand_detection_confidence",
+                        type=float, default=0.6 if args.finish_drawing == "fist" else 0.85,
+                        help="The confidence for hand detector should be lower, because we have to detect fist also."
+                             "For two hands detector the confidence has to be higher to get rid of false positives.")
 
     args, _ = parser.parse_known_args()