Skip to content

Commit

Permalink
Merge pull request #15 from arekmula/finish_drawing
Browse files Browse the repository at this point in the history
Finish drawing
  • Loading branch information
arekmula authored Jun 1, 2021
2 parents 0e8bfc8 + 4739e7e commit 84ff4ae
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 44 deletions.
23 changes: 17 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ The images from the camera are being sent to the PC, where your hand and its pos
The detected hand movement is then converted to drone steering commands which makes the drone replicate your movement.

## Steering
- Palm -> Drawing
- Fist -> Stop drawing
- Two hands -> Stop Drawing
There are two methods to draw the drawing. The method can be chosen
by providing `finish_drawing` argument while running the script.:
- First allows the user to draw by **any hand gesture**. The drawing is finished by showing **two hands at once**.
- The second allows the user to draw by the **palm gesture**. The drawing is finished by showing a **fist gesture**.
Note, that if more than one hand is being shown, the drawing will be made by a right hand.

![alt text](pictures/palm.png "PALM GESTURE")
![alt text](pictures/fist.png "FIST GESTURE")
Expand Down Expand Up @@ -51,10 +53,17 @@ directory.
To run the Tello Drawer use following commands:
- To run with the the Tello drone:
```
python3 main.py --image_source "tello" --local_ip "0.0.0.0" --local_port 8889
python3 main.py
```
While running up the script you can set additional parameters:
```
--finish_drawing - Finish drawing sign
--max_area - The max area [cm] that drone can use to perform the drawing
--min_length - Minimum length between points, to reduce number of points from detection
--takeoff_offset - Takeoff move up offset in cm
```

- You can also run the test drawing with your PC built-in camera or video that you recorded earlier.
- You can also run the test drawing with your built-in PC camera or video that you recorded earlier.
```
python3 main.py --image_source "built_camera" --camera_index 0
python3 main.py --image_source "saved_file" --filepath "path/to/file"
Expand All @@ -65,7 +74,7 @@ python3 main.py --image_source "saved_file" --filepath "path/to/file"
The dataset saver helps in gathering the data using the Tello drone for further processing.
It connects to the Tello drone, activates the video stream, and saves each received frame.
```
python3 dataset_saver.py --local_ip "0.0.0.0" --local_port 8889 --save_img True
python3 dataset_saver.py --save_img True
```
- Set fps with `--fps` flag
- Set dataset saving directory with `--save_dir`
Expand All @@ -80,3 +89,5 @@ We haven't made any changes to the detector.
We have to split the hand detections into 2 separate classes.
The fist is responsible for the start/stop signal while the palm is responsible for drawing. To do so we created
classifier based on pretrained EfficientNetB0. Date base is available [here](https://www.gti.ssr.upm.es/data/HandGesture_database.html)

TODO: Improve accuraccy of hand classification in real environment.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
opencv-python~=4.5.1.48
tensorflow==2.4.1
tensorflow==2.4.2
numpy~=1.19.5
djitellopy2
13 changes: 5 additions & 8 deletions src/dataset_saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
from argparse import ArgumentParser
from pathlib import Path

from tello import Tello
from djitellopy import Tello


def main(args):
tello = Tello(local_ip=args.local_ip, local_port=args.local_port)

tello = Tello()
tello.connect()
tello.streamon()
# Create directory to save images if it doesn't exists
if args.save_img:
timestamp = str(time.time())
Expand All @@ -27,10 +28,8 @@ def main(args):
cv2.destroyAllWindows()
break

img = tello.read()
img = tello.get_frame_read().frame
if img is not None:
# The image received from tello is RGB, OpenCV works in BGR format
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

# Show the image
cv2.imshow("tello", img)
Expand All @@ -44,8 +43,6 @@ def main(args):
if __name__ == "__main__":
parser = ArgumentParser()

parser.add_argument("--local_ip", metavar="local_ip", type=str, required=True)
parser.add_argument("--local_port", metavar="local_port", type=int, required=True)
parser.add_argument("--save_img", metavar="save_img", type=bool, default=False)
parser.add_argument("--save_dir", metavar="save_dir", type=str, default="dataset")
parser.add_argument("--fps", metavar="fps", type=int, default=30)
Expand Down
77 changes: 59 additions & 18 deletions src/image_processing/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,56 @@


class ImageProcessor:
def __init__(self, enlargebox_px=15, queue_size=20, drawing_state_threshold=0.5,
inactivity_std_dev_threshold=4, activity_std_dev_lower_threshold=15,
TWO_HANDS_FINISH = 0
FIST_FINISH = 1

MINIMUM_QUEUE_SIZE = 5

def __init__(self, finish_drawing_sign, hand_detector_confidence, enlargebox_px=15, predictions_queue_size=20,
drawing_state_threshold=0.5, inactivity_std_dev_threshold=4, activity_std_dev_lower_threshold=15,
activity_std_dev_upper_threshold=100):
self.hand_detector = HandDetector(confidence=0.6)
self.hand_classifier = HandClassifier()
"""
:param finish_drawing_sign: Sign for finish drawing. Two hands or fist.
:param hand_detector_confidence: The minimal confidence for hand detector to classify detection as hand.
:param enlargebox_px: How much pixels should be added in each side to hand bbox to make it easier to classify.
:param predictions_queue_size: Size of last predictions queue.
:param drawing_state_threshold: Threshold of how many of the last predictions stored in the queue must be
assigned to either of the class to determine which class it is.
:param inactivity_std_dev_threshold: A maximum threshold of movement's standard deviation
to determine if stop sign appeared.
:param activity_std_dev_lower_threshold: A minimum threshold of movement's standard deviation to determine if
hand is in drawing state
:param activity_std_dev_upper_threshold: A maximum threshold of movement's standard deviation to determine if
hand is in drawing state and if it's not outlier.
"""

self.finish_drawing_sign = self.TWO_HANDS_FINISH if finish_drawing_sign == "two_hands" else self.FIST_FINISH

if self.finish_drawing_sign == self.FIST_FINISH:
# Hand classifier is need only if we have selected finishing drawing by a fist
self.hand_classifier = HandClassifier()

self.hand_detector = HandDetector(confidence=hand_detector_confidence)

self.enlargebox_pt = enlargebox_px
self.drawing_state_threshold = drawing_state_threshold
self.inactivity_std_dev_threshold = inactivity_std_dev_threshold
self.activity_std_dev_lower_threshold = activity_std_dev_lower_threshold
# When using fist finishing, the minimal standard deviation in movement has to be at some threshold, otherwise
# the standing palm hand might be classified as Fist, and the drawing might be finished.
self.activity_std_dev_lower_threshold = activity_std_dev_lower_threshold if\
self.finish_drawing_sign == self.FIST_FINISH else 0
self.activity_std_dev_upper_threshold = activity_std_dev_upper_threshold

self.image_size = self.hand_detector.get_image_size()
self.path_image = np.zeros(shape=self.image_size, dtype=np.uint8)

self.last_class_predictions = []
self.last_box_predictions = []
self.queue_size = queue_size
# When using fist finishing, the queue_size needs to be bigger, so more last predictions
# are used to determine if it was Fist or Palm
self.predictions_queue_size = predictions_queue_size if self.finish_drawing_sign == self.FIST_FINISH else\
self.MINIMUM_QUEUE_SIZE

self.drawing_state = False
self.drawing_points = []
Expand All @@ -36,26 +68,34 @@ def process_img(self, frame):

if len(boxes_images) > 0:
if len(boxes_images) > 1:
# TODO: Handle it better
# If there's more than one hand, get right hand
# Right hand has minimum x value
# right_hand_index = np.argmin([box[0] for box in boxes])
# boxes_images = [boxes_images[right_hand_index]]
# boxes = [boxes[right_hand_index]]
self.finish_drawing = True
if self.finish_drawing_sign == self.TWO_HANDS_FINISH:
# Finish drawing if two hands were detected.
self.finish_drawing = True
else:
# If there's more than one hand, get right hand
# Right hand has minimum x value
right_hand_index = np.argmin([box[0] for box in boxes])
boxes_images = [boxes_images[right_hand_index]]
boxes = [boxes[right_hand_index]]

if not self.finish_drawing:
for idx, (box_image, box) in enumerate(zip(boxes_images, boxes)):
prediction = self.hand_classifier.predict(box_image, should_preprocess_input=True)
box_middle = [int(box[0] + box[2]/2), int(box[1]+box[3]/2)]
for box_image, box in zip(boxes_images, boxes):

if self.finish_drawing_sign == self.FIST_FINISH:
prediction = self.hand_classifier.predict(box_image, should_preprocess_input=True)
else:
# Create mock prediction from classifier, so it always thinks that it's palm
prediction = [0, 1]

box_middle = [int(box[0] + box[2]/2), int(box[1]+box[3]/2)]
self.add_predictions_to_queues(np.argmax(prediction), box_middle)
self.calculate_drawing_state()

if not self.is_outlier:
if self.drawing_state:
cv2.circle(self.path_image, tuple(box_middle), radius=2, color=(0, 255, 0), thickness=-1)
cv2.putText(self.path_image, str(idx), tuple(box_middle), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
cv2.putText(self.path_image, str(len(self.drawing_points)), tuple(box_middle),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=1, color=(255, 0, 0))
self.drawing_points.append(box_middle)
else:
Expand Down Expand Up @@ -86,7 +126,8 @@ def add_predictions_to_queues(self, class_prediction, box_prediction):
self.last_box_predictions.append(box_prediction)

def is_queue_full(self):
if len(self.last_class_predictions) == self.queue_size and len(self.last_box_predictions) == self.queue_size:
if len(self.last_class_predictions) == self.predictions_queue_size and\
len(self.last_box_predictions) == self.predictions_queue_size:
return True
else:
return False
Expand Down
49 changes: 38 additions & 11 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,20 @@


def development_main(image_source, args):
"""
Main function used to development using built-in camera or file.
:param image_source:
:param args:
:return:
"""
if image_source == "built_camera":
cap = cv2.VideoCapture(args.camera_index)
else:
cap = cv2.VideoCapture(args.filepath)

image_processor = ImageProcessor()
image_processor = ImageProcessor(finish_drawing_sign=args.finish_drawing,
hand_detector_confidence=args.hand_detection_confidence)

while cap.isOpened():
while True:
Expand All @@ -39,14 +47,18 @@ def development_main(image_source, args):
cap.release()
cv2.destroyAllWindows()

drone_steering = DroneProcessor(max_area_cm=100)
# speed_values = drone_processing.calculate_speed(drawing_points)
rescaled_points = drone_steering.rescale_points(drawing_points)


def tello_main(args):
image_processor = ImageProcessor()
drone_processor = DroneProcessor(max_area_cm=args.max_area, min_length_between_points_cm=args.min_length)
"""
Main function used to control your drone using hand.
:param args:
:return:
"""
image_processor = ImageProcessor(finish_drawing_sign=args.finish_drawing,
hand_detector_confidence=args.hand_detection_confidence)
drone_processor = DroneProcessor(max_area_cm=args.max_area, min_length_between_points_cm=args.min_length,
starting_move_up_cm=args.takeoff_offset)

# Start pinigng tello to prevent it from landing
drone_processor.start_pinging_tello()
Expand Down Expand Up @@ -81,10 +93,16 @@ def tello_main(args):
# Finish drawing
drone_processor.finish_drawing()

cv2.destroyAllWindows()


def main(args):
image_source = args.image_source

print(f"Image source: {args.image_source}")
print(f"Finish drawing sign: {args.finish_drawing}")
print(f"Hand detection confidence: {args.hand_detection_confidence}")

if image_source == "built_camera" or image_source == "saved_file":
development_main(image_source=image_source, args=args)
else:
Expand All @@ -96,16 +114,25 @@ def main(args):

parser.add_argument("--image_source", metavar="image_source", type=str, default="tello",
choices=["built_camera", "saved_file", "tello"])
parser.add_argument("--finish_drawing", metavar="finish_drawing", type=str, default="two_hands",
choices=["two_hands", "fist"], help="Finish drawing sign")
args, _ = parser.parse_known_args()
if args.image_source == "saved_file":
parser.add_argument("--filepath", metavar="filepath", type=str, required=True)
elif args.image_source == "built_camera":
parser.add_argument("--camera_index", metavar="camera_index", type=int, default=0)
elif args.image_source == "tello":
parser.add_argument("--local_ip", metavar="local_ip", type=str, default="0.0.0.0")
parser.add_argument("--local_port", metavar="local_port", type=int, default=8889)
parser.add_argument("--max_area", metavar="max_area", type=int, default=100)
parser.add_argument("--min_length", metavar="min_length", type=int, default=5)
parser.add_argument("--max_area", metavar="max_area", type=int, default=100,
help="The max area [cm] that drone can use to perform the drawing")
parser.add_argument("--min_length", metavar="min_length", type=int, default=5,
help="Minimum length between points, to reduce number of points from detection")
parser.add_argument("--takeoff_offset", metavar="takeoff_offset", type=int, default=50,
help="Takeoff move up offset in cm.")

parser.add_argument("--hand_detection_confidence", metavar="hand_detection_confidence",
type=float, default=0.6 if args.finish_drawing == "fist" else 0.85,
help="The confidence for hand detector should be lower, because we have to detect fist also."
"For two hands detector the confidence has to be higher to get rid of false positives.")

args, _ = parser.parse_known_args()

Expand Down

0 comments on commit 84ff4ae

Please sign in to comment.