From e5277c1564f3244f98556507ee0aa3e8c09d0ef0 Mon Sep 17 00:00:00 2001 From: dmaletskiy Date: Thu, 12 Aug 2021 17:28:27 +0300 Subject: [PATCH 1/8] Added example for creating face mesh shared library for android applications --- .../mediapipe/apps/facemeshgpu_shared/BUILD | 97 +++++ .../apps/facemeshgpu_shared/MainActivity.java | 93 +++++ .../apps/facemeshgpu_shared/face_mesh_lib.cpp | 392 ++++++++++++++++++ .../apps/facemeshgpu_shared/face_mesh_lib.h | 93 +++++ 4 files changed, 675 insertions(+) create mode 100644 mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD create mode 100644 mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/MainActivity.java create mode 100644 mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp create mode 100644 mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD new file mode 100644 index 0000000000..859a830183 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD @@ -0,0 +1,97 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +licenses(["notice"]) + +package(default_visibility = ["//visibility:private"]) + +cc_binary( + name = "libmediapipe_jni.so", + linkshared = 1, + linkstatic = 1, + deps = [ + "//mediapipe/graphs/face_mesh:mobile_calculators", + "//mediapipe/java/com/google/mediapipe/framework/jni:mediapipe_framework_jni", + ], +) + +cc_library( + name = "mediapipe_jni_lib", + srcs = [":libmediapipe_jni.so"], + alwayslink = 1, +) + +cc_binary( + name = "libmediapipe.so", + linkshared = 1, + linkstatic = 1, + srcs = ["face_mesh_lib.cpp", "face_mesh_lib.h"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_video", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/flags:parse", + + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/modules/face_landmark:face_landmark_front_side_model_cpu_with_face_counter", + ] +) + +cc_library( + name = "mediapipe_lib", + srcs = [":libmediapipe.so"], + alwayslink = 1, +) + +android_binary( + name = "facemeshgpu", + srcs = glob(["*.java"]), + assets = [ + "//mediapipe/graphs/face_mesh:face_mesh_mobile_gpu.binarypb", + "//mediapipe/modules/face_landmark:face_landmark.tflite", + "//mediapipe/modules/face_detection:face_detection_short_range.tflite", + ], + assets_dir = "", + manifest = "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:AndroidManifest.xml", + manifest_values = { + "applicationId": "com.google.mediapipe.apps.facemeshgpu", + "appName": "Face Mesh", + "mainActivity": ".MainActivity", + "cameraFacingFront": "True", + "binaryGraphName": "face_mesh_mobile_gpu.binarypb", + "inputVideoStreamName": "input_video", + "outputVideoStreamName": "output_video", + "flipFramesVertically": "True", + "converterNumBuffers": "2", + }, + multidex = "native", + deps = [ + ":mediapipe_jni_lib", + ":mediapipe_lib", + "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/basic:basic_lib", + "//mediapipe/framework/formats:landmark_java_proto_lite", + "//mediapipe/java/com/google/mediapipe/framework:android_framework", + ], +) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/MainActivity.java new file mode 100644 index 0000000000..82c1f44789 --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/MainActivity.java @@ -0,0 +1,93 @@ +// Copyright 2019 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.mediapipe.apps.facemeshgpu; + +import android.os.Bundle; +import android.util.Log; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; +import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; +import com.google.mediapipe.framework.AndroidPacketCreator; +import com.google.mediapipe.framework.Packet; +import com.google.mediapipe.framework.PacketGetter; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** Main activity of MediaPipe face mesh app. */ +public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { + private static final String TAG = "MainActivity"; + + private static final String INPUT_NUM_FACES_SIDE_PACKET_NAME = "num_faces"; + private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_face_landmarks"; + // Max number of faces to detect/process. + private static final int NUM_FACES = 1; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + + AndroidPacketCreator packetCreator = processor.getPacketCreator(); + Map inputSidePackets = new HashMap<>(); + inputSidePackets.put(INPUT_NUM_FACES_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_FACES)); + processor.setInputSidePackets(inputSidePackets); + + // To show verbose logging, run: + // adb shell setprop log.tag.MainActivity VERBOSE + if (Log.isLoggable(TAG, Log.VERBOSE)) { + processor.addPacketCallback( + OUTPUT_LANDMARKS_STREAM_NAME, + (packet) -> { + Log.v(TAG, "Received multi face landmarks packet."); + List multiFaceLandmarks = + PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser()); + Log.v( + TAG, + "[TS:" + + packet.getTimestamp() + + "] " + + getMultiFaceLandmarksDebugString(multiFaceLandmarks)); + }); + } + } + + private static String getMultiFaceLandmarksDebugString( + List multiFaceLandmarks) { + if (multiFaceLandmarks.isEmpty()) { + return "No face landmarks"; + } + String multiFaceLandmarksStr = "Number of faces detected: " + multiFaceLandmarks.size() + "\n"; + int faceIndex = 0; + for (NormalizedLandmarkList landmarks : multiFaceLandmarks) { + multiFaceLandmarksStr += + "\t#Face landmarks for face[" + faceIndex + "]: " + landmarks.getLandmarkCount() + "\n"; + int landmarkIndex = 0; + for (NormalizedLandmark landmark : landmarks.getLandmarkList()) { + multiFaceLandmarksStr += + "\t\tLandmark [" + + landmarkIndex + + "]: (" + + landmark.getX() + + ", " + + landmark.getY() + + ", " + + landmark.getZ() + + ")\n"; + ++landmarkIndex; + } + ++faceIndex; + } + return multiFaceLandmarksStr; + } +} diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp new file mode 100644 index 0000000000..ab8e6d11de --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp @@ -0,0 +1,392 @@ +#include "face_mesh_lib.h" + +MPFaceMeshDetector::MPFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path) { + const auto status = InitFaceMeshDetector(numFaces, face_detection_model_path, + face_landmark_model_path); + if (!status.ok()) { + LOG(INFO) << "Failed constructing FaceMeshDetector."; + LOG(INFO) << status.message(); + } +} + +absl::Status +MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, { + numFaces = std::max(numFaces, 1); + + if (face_detection_model_path == nullptr) { + face_detection_model_path = + "mediapipe/modules/face_detection/face_detection_short_range.tflite"; + } + + if (face_landmark_model_path == nullptr) { + face_landmark_model_path = + "mediapipe/modules/face_landmark/face_landmark.tflite"; + } + + // Prepare graph config. + auto preparedGraphConfig = absl::StrReplaceAll( + graphConfig, {{"$numFaces", std::to_string(numFaces)}}); + preparedGraphConfig = absl::StrReplaceAll( + preparedGraphConfig, + {{"$faceDetectionModelPath", face_detection_model_path}}); + preparedGraphConfig = absl::StrReplaceAll( + preparedGraphConfig, + {{"$faceLandmarkModelPath", face_landmark_model_path}}); + + LOG(INFO) << "Get calculator graph config contents: " << preparedGraphConfig; + + mediapipe::CalculatorGraphConfig config = + mediapipe::ParseTextProtoOrDie( + preparedGraphConfig); + LOG(INFO) << "Initialize the calculator graph."; + + MP_RETURN_IF_ERROR(graph.Initialize(config)); + + LOG(INFO) << "Start running the calculator graph."; + + ASSIGN_OR_RETURN(mediapipe::OutputStreamPolle landmarks_poller, + graph.AddOutputStreamPoller(kOutputStream_landmarks)); + ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller face_count_poller, + graph.AddOutputStreamPoller(kOutputStream_faceCount)); + ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller face_rects_from_landmarks_poller, + graph.AddOutputStreamPoller(kOutputStream_face_rects_from_landmarks)); + + landmarks_poller_ptr = std::make_unique( + std::move(landmarks_poller)); + face_count_poller_ptr = std::make_unique( + std::move(face_count_poller)); + face_rects_from_landmarks_poller_ptr = + std::make_unique( + std::move(face_rects_from_landmarks_poller)); + + MP_RETURN_IF_ERROR(graph.StartRun({})); + + LOG(INFO) << "MPFaceMeshDetector constructed successfully."; + + return absl::OkStatus(); +} + +absl::Status +MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces) { + if (!numFaces || !multi_face_bounding_boxes) { + return absl::InvalidArgumentError( + "MPFaceMeshDetector::DetectFacesWithStatus requires notnull pointer to " + "save results data."); + } + + // Reset face counts. + *numFaces = 0; + face_count = 0; + + // Wrap Mat into an ImageFrame. + auto input_frame = absl::make_unique( + mediapipe::ImageFormat::SRGB, camera_frame.cols, camera_frame.rows, + mediapipe::ImageFrame::kDefaultAlignmentBoundary); + cv::Mat input_frame_mat = mediapipe::formats::MatView(input_frame.get()); + camera_frame.copyTo(input_frame_mat); + + // Send image packet into the graph. + static size_t timestamp = 0; + MP_RETURN_IF_ERROR(graph.AddPacketToInputStream( + kInputStream, mediapipe::Adopt(input_frame.release()) + .At(mediapipe::Timestamp(timestamp++)))); + + // Get face count. + mediapipe::Packet face_count_packet; + if (!face_count_poller_ptr || + !face_count_poller_ptr->Next(&face_count_packet)) { + return absl::CancelledError( + "Failed during getting next face_count_packet."); + } + + auto &face_count_val = face_count_packet.Get(); + + if (face_count_val <= 0) { + return absl::OkStatus(); + } + + // Get face bounding boxes. + mediapipe::Packet face_rects_from_landmarks_packet; + if (!face_rects_from_landmarks_poller_ptr || + !face_rects_from_landmarks_poller_ptr->Next( + &face_rects_from_landmarks_packet)) { + return absl::CancelledError( + "Failed during getting next face_rects_from_landmarks_packet."); + } + + auto &face_bounding_boxes = + face_rects_from_landmarks_packet + .Get < ::std::vector < ::mediapipe::NormalizedRect >> (); + + image_width = camera_frame.cols; + image_height = camera_frame.rows; + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert vector (center based Rects) to cv::Rect* + // (leftTop based Rects). + for (int i = 0; i < face_count_val; i) { + const auto &normalized_bounding_box = face_bounding_boxes[i]; + auto &bounding_box = multi_face_bounding_boxes[i]; + + const auto width = + static_cast(normalized_bounding_box.width() * image_width_f); + const auto height = + static_cast(normalized_bounding_box.height() * image_height_f); + + bounding_box.x = + static_cast(normalized_bounding_box.x_center() * image_width_f) - + (width >> 1); + bounding_box.y = + static_cast(normalized_bounding_box.y_center() * image_height_f) - + (height >> 1); + bounding_box.width = width; + bounding_box.height = height; + } + + // Get face landmarks. + if (!landmarks_poller_ptr || + !landmarks_poller_ptr->Next(&face_landmarks_packet)) { + return absl::CancelledError("Failed during getting next landmarks_packet."); + } + + *numFaces = face_count_val; + face_count = face_count_val; + + return absl::OkStatus(); +} + +void MPFaceMeshDetector::DetectFaces(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces) { + const auto status = + DetectFacesWithStatus(camera_frame, multi_face_bounding_boxes, numFaces); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectFaces failed: " << status.message(); + } +} + +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point2f **multi_face_landmarks) { + + if (face_landmarks_packet.IsEmpty()) { + return absl::CancelledError("Face landmarks packet is empty."); + } + + auto &face_landmarks = face_landmarks_packet + + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert landmarks to cv::Point2f**. + for (int i = 0; i < face_count; i) { + const auto &normalizedLandmarkList = face_landmarks[i]; + const auto landmarks_num = normalizedLandmarkList.landmark_size(); + + if (landmarks_num != kLandmarksNum) { + return absl::CancelledError("Detected unexpected landmarks number."); + } + + auto &face_landmarks = multi_face_landmarks[i]; + + for (int j = 0; j < landmarks_num; j) { + const auto &landmark = normalizedLandmarkList.landmark(j); + face_landmarks[j].x = landmark.x() * image_width_f; + face_landmarks[j].y = landmark.y() * image_height_f; + } + } + + return absl::OkStatus(); +} + +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point3f **multi_face_landmarks) { + + if (face_landmarks_packet.IsEmpty()) { + return absl::CancelledError("Face landmarks packet is empty."); + } + + auto &face_landmarks = face_landmarks_packet + + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert landmarks to cv::Point3f**. + for (int i = 0; i < face_count; i) { + const auto &normalized_landmark_list = face_landmarks[i]; + const auto landmarks_num = normalized_landmark_list.landmark_size(); + + if (landmarks_num != kLandmarksNum) { + return absl::CancelledError("Detected unexpected landmarks number."); + } + + auto &face_landmarks = multi_face_landmarks[i]; + + for (int j = 0; j < landmarks_num; j) { + const auto &landmark = normalized_landmark_list.landmark(j); + face_landmarks[j].x = landmark.x() * image_width_f; + face_landmarks[j].y = landmark.y() * image_height_f; + face_landmarks[j].z = landmark.z(); + } + } + + return absl::OkStatus(); +} + +void MPFaceMeshDetector::DetectLandmarks(cv::Point2f **multi_face_landmarks, + int *numFaces) { + *numFaces = 0; + const auto status = DetectLandmarksWithStatus(multi_face_landmarks); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " + << status.message(); + } + *numFaces = face_count; +} + +void MPFaceMeshDetector::DetectLandmarks(cv::Point3f **multi_face_landmarks, + int *numFaces) { + *numFaces = 0; + const auto status = DetectLandmarksWithStatus(multi_face_landmarks); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " + << status.message(); + } + *numFaces = face_count; +} + +extern "C" { +MPFaceMeshDetector * +MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, + const char *face_landmark_model_path) { + return new MPFaceMeshDetector(numFaces, face_detection_model_path, + face_landmark_model_path); +} + +void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) { + delete detector; +} + +void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector *detector, const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces) { + detector->DetectFaces(camera_frame, multi_face_bounding_boxes, numFaces); +} +void +MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, + cv::Point2f **multi_face_landmarks, + int *numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); +} +void +MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, + cv::Point3f **multi_face_landmarks, + int *numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); +} + +int MPFaceMeshDetectorLandmarksNum() { + return MPFaceMeshDetector::kLandmarksNum; +} +} + +const std::string MPFaceMeshDetector::graphConfig = R"pb( +# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU. + +# Input image. (ImageFrame) +input_stream: "input_video" + +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Detected faces count. (int) +output_stream: "face_count" + +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "face_rects_from_landmarks" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:face_count" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:num_faces" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: $numFaces } + } + } +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:face_detection_model_path" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { string_value: "$faceDetectionModelPath" } + } + } +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:face_landmark_model_path" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { string_value: "$faceLandmarkModelPath" } + } + } +} + +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:0:face_detection_model_path" + input_side_packet: "FILE_PATH:1:face_landmark_model_path" + output_side_packet: "CONTENTS:0:face_detection_model_blob" + output_side_packet: "CONTENTS:1:face_landmark_model_blob" +} + +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:face_detection_model_blob" + output_side_packet: "MODEL:face_detection_model" +} +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:face_landmark_model_blob" + output_side_packet: "MODEL:face_landmark_model" +} + + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontSideModelCpuWithFaceCounter" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "MODEL:0:face_detection_model" + input_side_packet: "MODEL:1:face_landmark_model" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count" +} + +)pb"; diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h new file mode 100644 index 0000000000..32f64492ec --- /dev/null +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h @@ -0,0 +1,93 @@ +#ifndef FACE_MESH_LIBRARY_H +#define FACE_MESH_LIBRARY_H + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/str_replace.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/output_stream_poller.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/opencv_highgui_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/opencv_video_inc.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" + +class MPFaceMeshDetector { + public: + MPFaceMeshDetector(int numFaces, const char *face_detection_model_path, + const char *face_landmark_model_path); + + void DetectFaces(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces); + + void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces); + void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces); + + static constexpr auto kLandmarksNum = 468; + + private: + absl::Status InitFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path); + absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces); + + + static constexpr auto kInputStream = "input_video"; + static constexpr auto kOutputStream_landmarks = "multi_face_landmarks"; + static constexpr auto kOutputStream_faceCount = "face_count"; + static constexpr auto kOutputStream_face_rects_from_landmarks = "face_rects_from_landmarks"; + + static const std::string graphConfig; + + mediapipe::CalculatorGraph graph; + + std::unique_ptr landmarks_poller_ptr; + std::unique_ptr face_count_poller_ptr; + std::unique_ptr face_rects_from_landmarks_poller_ptr; + + int face_count; + int image_width; + int image_height; + mediapipe::Packet face_landmarks_packet; + }; + +#ifdef __cplusplus +extern "C" { +#endif +MPFaceMeshDetector * +MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, + const char *face_landmark_model_path); + +void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector); + +void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector *detector, const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces); + +void +MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, + cv::Point2f **multi_face_landmarks, + int *numFaces); +void +MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, + cv::Point3f **multi_face_landmarks, + int *numFaces); + +int MPFaceMeshDetectorLandmarksNum(); + +#ifdef __cplusplus +}; +#endif +#endif \ No newline at end of file From 2828ccebba4e579c85b802433d4532bc9923f1fa Mon Sep 17 00:00:00 2001 From: maletsden Date: Thu, 26 Aug 2021 17:18:08 +0300 Subject: [PATCH 2/8] Merge. PR #1. Face mesh Windows DLL desktop example --- mediapipe/calculators/util/BUILD | 14 + .../util/counting_vector_size_calculator.cc | 26 ++ .../util/counting_vector_size_calculator.h | 79 ++++ .../examples/desktop/face_mesh_dll/BUILD | 66 +++ .../desktop/face_mesh_dll/face_mesh_cpu.cpp | 96 +++++ .../desktop/face_mesh_dll/face_mesh_lib.cpp | 398 ++++++++++++++++++ .../desktop/face_mesh_dll/face_mesh_lib.h | 105 +++++ .../face_mesh_dll/windows_dll_library.bzl | 62 +++ mediapipe/modules/face_detection/BUILD | 12 + ...detection_short_range_side_model_cpu.pbtxt | 86 ++++ mediapipe/modules/face_landmark/BUILD | 60 +++ ...landmark_front_cpu_with_face_counter.pbtxt | 249 +++++++++++ ...ont_side_model_cpu_with_face_counter.pbtxt | 256 +++++++++++ .../face_landmark_side_model_cpu.pbtxt | 143 +++++++ 14 files changed, 1652 insertions(+) create mode 100644 mediapipe/calculators/util/counting_vector_size_calculator.cc create mode 100644 mediapipe/calculators/util/counting_vector_size_calculator.h create mode 100644 mediapipe/examples/desktop/face_mesh_dll/BUILD create mode 100644 mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp create mode 100644 mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp create mode 100644 mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h create mode 100644 mediapipe/examples/desktop/face_mesh_dll/windows_dll_library.bzl create mode 100644 mediapipe/modules/face_detection/face_detection_short_range_side_model_cpu.pbtxt create mode 100644 mediapipe/modules/face_landmark/face_landmark_front_cpu_with_face_counter.pbtxt create mode 100644 mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt create mode 100644 mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt diff --git a/mediapipe/calculators/util/BUILD b/mediapipe/calculators/util/BUILD index e759ff9902..869b4387e6 100644 --- a/mediapipe/calculators/util/BUILD +++ b/mediapipe/calculators/util/BUILD @@ -18,6 +18,20 @@ licenses(["notice"]) package(default_visibility = ["//visibility:public"]) +cc_library( + name = "counting_vector_size_calculator", + srcs = ["counting_vector_size_calculator.cc"], + hdrs = ["counting_vector_size_calculator.h"], + visibility = [ + "//visibility:public", + ], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:landmark_cc_proto", + ], + alwayslink = 1, +) + cc_library( name = "alignment_points_to_rects_calculator", srcs = ["alignment_points_to_rects_calculator.cc"], diff --git a/mediapipe/calculators/util/counting_vector_size_calculator.cc b/mediapipe/calculators/util/counting_vector_size_calculator.cc new file mode 100644 index 0000000000..c2203686f6 --- /dev/null +++ b/mediapipe/calculators/util/counting_vector_size_calculator.cc @@ -0,0 +1,26 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/calculators/util/counting_vector_size_calculator.h" + +#include "mediapipe/framework/formats/landmark.pb.h" + +namespace mediapipe { + +typedef CountingVectorSizeCalculator< + std::vector<::mediapipe::NormalizedLandmarkList>> + CountingNormalizedLandmarkListVectorSizeCalculator; + +REGISTER_CALCULATOR(CountingNormalizedLandmarkListVectorSizeCalculator); +} // namespace mediapipe diff --git a/mediapipe/calculators/util/counting_vector_size_calculator.h b/mediapipe/calculators/util/counting_vector_size_calculator.h new file mode 100644 index 0000000000..4921d3c277 --- /dev/null +++ b/mediapipe/calculators/util/counting_vector_size_calculator.h @@ -0,0 +1,79 @@ +// Copyright 2020 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MEDIAPIPE_CALCULATORS_UTIL_COUNTING_VECTOR_SIZE_CALCULATOR_H +#define MEDIAPIPE_CALCULATORS_UTIL_COUNTING_VECTOR_SIZE_CALCULATOR_H + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/formats/landmark.pb.h" + +namespace mediapipe { + +// A calculator that counts the size of the input vector. It was created to +// aid in polling packets in the output stream synchronously. If there is +// a clock stream, it will output a value of 0 even if the input vector stream +// is empty. If not, it will output some value only if there is an input vector. +// The clock stream must have the same time stamp as the vector stream, and +// it must be the stream where packets are transmitted while the graph is +// running. (e.g. Any input stream of graph) +// +// It is designed to be used like: +// +// Example config: +// node { +// calculator: "CountingWithVectorSizeCalculator" +// input_stream: "CLOCK:triger_signal" +// input_stream: "VECTOR:input_vector" +// output_stream: "COUNT:vector_count" +// } +// +// node { +// calculator: "CountingWithVectorSizeCalculator" +// input_stream: "VECTOR:input_vector" +// output_stream: "COUNT:vector_count" +// } + +template +class CountingVectorSizeCalculator : public CalculatorBase { +public: + static ::mediapipe::Status GetContract(CalculatorContract *cc) { + if (cc->Inputs().HasTag("CLOCK")) { + cc->Inputs().Tag("CLOCK").SetAny(); + } + + RET_CHECK(cc->Inputs().HasTag("VECTOR")); + cc->Inputs().Tag("VECTOR").Set(); + RET_CHECK(cc->Outputs().HasTag("COUNT")); + cc->Outputs().Tag("COUNT").Set(); + + return ::mediapipe::OkStatus(); + } + + ::mediapipe::Status Process(CalculatorContext *cc) { + std::unique_ptr face_count; + if (!cc->Inputs().Tag("VECTOR").IsEmpty()) { + const auto &landmarks = cc->Inputs().Tag("VECTOR").Get(); + face_count = absl::make_unique(landmarks.size()); + } else { + face_count = absl::make_unique(0); + } + cc->Outputs().Tag("COUNT").Add(face_count.release(), cc->InputTimestamp()); + + return ::mediapipe::OkStatus(); + }; +}; + +} // namespace mediapipe + +#endif // MEDIAPIPE_CALCULATORS_UTIL_COUNTING_VECTOR_SIZE_CALCULATOR_H diff --git a/mediapipe/examples/desktop/face_mesh_dll/BUILD b/mediapipe/examples/desktop/face_mesh_dll/BUILD new file mode 100644 index 0000000000..8b60297901 --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/BUILD @@ -0,0 +1,66 @@ +# Copyright 2019 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("windows_dll_library.bzl", "windows_dll_library") + +licenses(["notice"]) + +filegroup( + name = "srcs", + srcs = glob(["**"]), + visibility = ["//examples:__pkg__"], +) + +package(default_visibility = ["//mediapipe/examples:__subpackages__"]) + +# Define the shared library +windows_dll_library( + name = "face_mesh_lib", + srcs = ["face_mesh_lib.cpp"], + hdrs = ["face_mesh_lib.h"], + # Define COMPILING_DLL to export symbols during compiling the DLL. + copts = ["-DCOMPILING_DLL"], + deps = [ + "//mediapipe/framework:calculator_framework", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/port:file_helpers", + "//mediapipe/framework/port:opencv_highgui", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/framework/port:opencv_video", + "//mediapipe/framework/port:parse_text_proto", + "//mediapipe/framework/port:status", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/flags:parse", + + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:flow_limiter_calculator", + "//mediapipe/calculators/tflite:tflite_model_calculator", + "//mediapipe/calculators/util:local_file_contents_calculator", + "//mediapipe/modules/face_landmark:face_landmark_front_side_model_cpu_with_face_counter", + ] +) + +# **Implicitly link to face_mesh_lib.dll** + +## Link to face_mesh_lib.dll through its import library. +cc_binary( + name = "face_mesh_cpu", + srcs = ["face_mesh_cpu.cpp"], + deps = [ + ":face_mesh_lib", + ], +) \ No newline at end of file diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp new file mode 100644 index 0000000000..83762a1a13 --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_cpu.cpp @@ -0,0 +1,96 @@ +#include "face_mesh_lib.h" + +int main(int argc, char **argv) { + google::InitGoogleLogging(argv[0]); + absl::ParseCommandLine(argc, argv); + + cv::VideoCapture capture; + capture.open(0); + if (!capture.isOpened()) { + return -1; + } + + constexpr char kWindowName[] = "MediaPipe"; + + cv::namedWindow(kWindowName, /*flags=WINDOW_AUTOSIZE*/ 1); +#if (CV_MAJOR_VERSION >= 3) && (CV_MINOR_VERSION >= 2) + capture.set(cv::CAP_PROP_FRAME_WIDTH, 640); + capture.set(cv::CAP_PROP_FRAME_HEIGHT, 480); + capture.set(cv::CAP_PROP_FPS, 30); +#endif + + LOG(INFO) << "VideoCapture initialized."; + + // Maximum number of faces that can be detected + constexpr int maxNumFaces = 1; + constexpr char face_detection_model_path[] = + "mediapipe/modules/face_detection/face_detection_short_range.tflite"; + constexpr char face_landmark_model_path[] = + "mediapipe/modules/face_landmark/face_landmark.tflite"; + + MPFaceMeshDetector *faceMeshDetector = MPFaceMeshDetectorConstruct( + maxNumFaces, face_detection_model_path, face_landmark_model_path); + + // Allocate memory for face landmarks. + auto multiFaceLandmarks = new cv::Point2f *[maxNumFaces]; + for (int i = 0; i < maxNumFaces; ++i) { + multiFaceLandmarks[i] = new cv::Point2f[MPFaceMeshDetectorLandmarksNum]; + } + + std::vector multiFaceBoundingBoxes(maxNumFaces); + + LOG(INFO) << "FaceMeshDetector constructed."; + + LOG(INFO) << "Start grabbing and processing frames."; + bool grab_frames = true; + + while (grab_frames) { + // Capture opencv camera. + cv::Mat camera_frame_raw; + capture >> camera_frame_raw; + if (camera_frame_raw.empty()) { + LOG(INFO) << "Ignore empty frames from camera."; + continue; + } + + cv::Mat camera_frame; + cv::cvtColor(camera_frame_raw, camera_frame, cv::COLOR_BGR2RGB); + + int faceCount = 0; + + MPFaceMeshDetectorDetectFaces(faceMeshDetector, camera_frame, + multiFaceBoundingBoxes.data(), &faceCount); + + if (faceCount > 0) { + auto &face_bounding_box = multiFaceBoundingBoxes[0]; + + cv::rectangle(camera_frame_raw, face_bounding_box, cv::Scalar(0, 255, 0), + 3); + + int landmarksNum = 0; + MPFaceMeshDetectorDetect2DLandmarks(faceMeshDetector, multiFaceLandmarks, + &landmarksNum); + auto &face_landmarks = multiFaceLandmarks[0]; + auto &landmark = face_landmarks[0]; + + LOG(INFO) << "First landmark: x - " << landmark.x << ", y - " + << landmark.y; + } + + const int pressed_key = cv::waitKey(5); + if (pressed_key >= 0 && pressed_key != 255) + grab_frames = false; + + cv::imshow(kWindowName, camera_frame_raw); + } + + LOG(INFO) << "Shutting down."; + + // Deallocate memory for face landmarks. + for (int i = 0; i < maxNumFaces; ++i) { + delete[] multiFaceLandmarks[i]; + } + delete[] multiFaceLandmarks; + + MPFaceMeshDetectorDestruct(faceMeshDetector); +} \ No newline at end of file diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp new file mode 100644 index 0000000000..b3082e58ca --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.cpp @@ -0,0 +1,398 @@ +#include "face_mesh_lib.h" + +MPFaceMeshDetector::MPFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path) { + const auto status = InitFaceMeshDetector(numFaces, face_detection_model_path, + face_landmark_model_path); + if (!status.ok()) { + LOG(INFO) << "Failed constructing FaceMeshDetector."; + LOG(INFO) << status.message(); + } +} + +absl::Status +MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path) { + numFaces = std::max(numFaces, 1); + + if (face_detection_model_path == nullptr) { + face_detection_model_path = + "mediapipe/modules/face_detection/face_detection_short_range.tflite"; + } + + if (face_landmark_model_path == nullptr) { + face_landmark_model_path = + "mediapipe/modules/face_landmark/face_landmark.tflite"; + } + + // Prepare graph config. + auto preparedGraphConfig = absl::StrReplaceAll( + graphConfig, {{"$numFaces", std::to_string(numFaces)}}); + preparedGraphConfig = absl::StrReplaceAll( + preparedGraphConfig, + {{"$faceDetectionModelPath", face_detection_model_path}}); + preparedGraphConfig = absl::StrReplaceAll( + preparedGraphConfig, + {{"$faceLandmarkModelPath", face_landmark_model_path}}); + + LOG(INFO) << "Get calculator graph config contents: " << preparedGraphConfig; + + mediapipe::CalculatorGraphConfig config = + mediapipe::ParseTextProtoOrDie( + preparedGraphConfig); + LOG(INFO) << "Initialize the calculator graph."; + + MP_RETURN_IF_ERROR(graph.Initialize(config)); + + LOG(INFO) << "Start running the calculator graph."; + + ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller landmarks_poller, + graph.AddOutputStreamPoller(kOutputStream_landmarks)); + ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller face_count_poller, + graph.AddOutputStreamPoller(kOutputStream_faceCount)); + ASSIGN_OR_RETURN( + mediapipe::OutputStreamPoller face_rects_from_landmarks_poller, + graph.AddOutputStreamPoller(kOutputStream_face_rects_from_landmarks)); + + landmarks_poller_ptr = std::make_unique( + std::move(landmarks_poller)); + face_count_poller_ptr = std::make_unique( + std::move(face_count_poller)); + face_rects_from_landmarks_poller_ptr = + std::make_unique( + std::move(face_rects_from_landmarks_poller)); + + MP_RETURN_IF_ERROR(graph.StartRun({})); + + LOG(INFO) << "MPFaceMeshDetector constructed successfully."; + + return absl::OkStatus(); +} + +absl::Status +MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces) { + if (!numFaces || !multi_face_bounding_boxes) { + return absl::InvalidArgumentError( + "MPFaceMeshDetector::DetectFacesWithStatus requires notnull pointer to " + "save results data."); + } + + // Reset face counts. + *numFaces = 0; + face_count = 0; + + // Wrap Mat into an ImageFrame. + auto input_frame = absl::make_unique( + mediapipe::ImageFormat::SRGB, camera_frame.cols, camera_frame.rows, + mediapipe::ImageFrame::kDefaultAlignmentBoundary); + cv::Mat input_frame_mat = mediapipe::formats::MatView(input_frame.get()); + camera_frame.copyTo(input_frame_mat); + + // Send image packet into the graph. + size_t frame_timestamp_us = static_cast(cv::getTickCount()) / + static_cast(cv::getTickFrequency()) * 1e6; + MP_RETURN_IF_ERROR(graph.AddPacketToInputStream( + kInputStream, mediapipe::Adopt(input_frame.release()) + .At(mediapipe::Timestamp(frame_timestamp_us)))); + + // Get face count. + mediapipe::Packet face_count_packet; + if (!face_count_poller_ptr || + !face_count_poller_ptr->Next(&face_count_packet)) { + return absl::CancelledError( + "Failed during getting next face_count_packet."); + } + + auto &face_count_val = face_count_packet.Get(); + + if (face_count_val <= 0) { + return absl::OkStatus(); + } + + // Get face bounding boxes. + mediapipe::Packet face_rects_from_landmarks_packet; + if (!face_rects_from_landmarks_poller_ptr || + !face_rects_from_landmarks_poller_ptr->Next( + &face_rects_from_landmarks_packet)) { + return absl::CancelledError( + "Failed during getting next face_rects_from_landmarks_packet."); + } + + auto &face_bounding_boxes = + face_rects_from_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedRect>>(); + + image_width = camera_frame.cols; + image_height = camera_frame.rows; + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert vector (center based Rects) to cv::Rect* + // (leftTop based Rects). + for (int i = 0; i < face_count_val; ++i) { + const auto &normalized_bounding_box = face_bounding_boxes[i]; + auto &bounding_box = multi_face_bounding_boxes[i]; + + const auto width = + static_cast(normalized_bounding_box.width() * image_width_f); + const auto height = + static_cast(normalized_bounding_box.height() * image_height_f); + + bounding_box.x = + static_cast(normalized_bounding_box.x_center() * image_width_f) - + (width >> 1); + bounding_box.y = + static_cast(normalized_bounding_box.y_center() * image_height_f) - + (height >> 1); + bounding_box.width = width; + bounding_box.height = height; + } + + // Get face landmarks. + if (!landmarks_poller_ptr || + !landmarks_poller_ptr->Next(&face_landmarks_packet)) { + return absl::CancelledError("Failed during getting next landmarks_packet."); + } + + *numFaces = face_count_val; + face_count = face_count_val; + + return absl::OkStatus(); +} + +void MPFaceMeshDetector::DetectFaces(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces) { + const auto status = + DetectFacesWithStatus(camera_frame, multi_face_bounding_boxes, numFaces); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectFaces failed: " << status.message(); + } +} +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point2f **multi_face_landmarks) { + + if (face_landmarks_packet.IsEmpty()) { + return absl::CancelledError("Face landmarks packet is empty."); + } + + auto &face_landmarks = + face_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); + + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert landmarks to cv::Point2f**. + for (int i = 0; i < face_count; ++i) { + const auto &normalizedLandmarkList = face_landmarks[i]; + const auto landmarks_num = normalizedLandmarkList.landmark_size(); + + if (landmarks_num != kLandmarksNum) { + return absl::CancelledError("Detected unexpected landmarks number."); + } + + auto &face_landmarks = multi_face_landmarks[i]; + + for (int j = 0; j < landmarks_num; ++j) { + const auto &landmark = normalizedLandmarkList.landmark(j); + face_landmarks[j].x = landmark.x() * image_width_f; + face_landmarks[j].y = landmark.y() * image_height_f; + } + } + + return absl::OkStatus(); +} + +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point3f **multi_face_landmarks) { + + if (face_landmarks_packet.IsEmpty()) { + return absl::CancelledError("Face landmarks packet is empty."); + } + + auto &face_landmarks = + face_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); + + const auto image_width_f = static_cast(image_width); + const auto image_height_f = static_cast(image_height); + + // Convert landmarks to cv::Point3f**. + for (int i = 0; i < face_count; ++i) { + const auto &normalized_landmark_list = face_landmarks[i]; + const auto landmarks_num = normalized_landmark_list.landmark_size(); + + if (landmarks_num != kLandmarksNum) { + return absl::CancelledError("Detected unexpected landmarks number."); + } + + auto &face_landmarks = multi_face_landmarks[i]; + + for (int j = 0; j < landmarks_num; ++j) { + const auto &landmark = normalized_landmark_list.landmark(j); + face_landmarks[j].x = landmark.x() * image_width_f; + face_landmarks[j].y = landmark.y() * image_height_f; + face_landmarks[j].z = landmark.z(); + } + } + + return absl::OkStatus(); +} + +void MPFaceMeshDetector::DetectLandmarks(cv::Point2f **multi_face_landmarks, + int *numFaces) { + *numFaces = 0; + const auto status = DetectLandmarksWithStatus(multi_face_landmarks); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " + << status.message(); + } + *numFaces = face_count; +} + +void MPFaceMeshDetector::DetectLandmarks(cv::Point3f **multi_face_landmarks, + int *numFaces) { + *numFaces = 0; + const auto status = DetectLandmarksWithStatus(multi_face_landmarks); + if (!status.ok()) { + LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " + << status.message(); + } + *numFaces = face_count; +} + +extern "C" { +DLLEXPORT MPFaceMeshDetector * +MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, + const char *face_landmark_model_path) { + return new MPFaceMeshDetector(numFaces, face_detection_model_path, + face_landmark_model_path); +} + +DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) { + delete detector; +} + +DLLEXPORT void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector *detector, const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces) { + detector->DetectFaces(camera_frame, multi_face_bounding_boxes, numFaces); +} +DLLEXPORT void +MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, + cv::Point2f **multi_face_landmarks, + int *numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); +} +DLLEXPORT void +MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, + cv::Point3f **multi_face_landmarks, + int *numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); +} + +DLLEXPORT const int MPFaceMeshDetectorLandmarksNum = + MPFaceMeshDetector::kLandmarksNum; +} + +const std::string MPFaceMeshDetector::graphConfig = R"pb( +# MediaPipe graph that performs face mesh with TensorFlow Lite on CPU. + +# Input image. (ImageFrame) +input_stream: "input_video" + +# Collection of detected/processed faces, each represented as a list of +# landmarks. (std::vector) +output_stream: "multi_face_landmarks" + +# Detected faces count. (int) +output_stream: "face_count" + +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "face_rects_from_landmarks" + +node { + calculator: "FlowLimiterCalculator" + input_stream: "input_video" + input_stream: "FINISHED:face_count" + input_stream_info: { + tag_index: "FINISHED" + back_edge: true + } + output_stream: "throttled_input_video" +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:num_faces" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { int_value: $numFaces } + } + } +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:face_detection_model_path" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { string_value: "$faceDetectionModelPath" } + } + } +} + +# Defines side packets for further use in the graph. +node { + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:face_landmark_model_path" + node_options: { + [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { + packet { string_value: "$faceLandmarkModelPath" } + } + } +} + +node { + calculator: "LocalFileContentsCalculator" + input_side_packet: "FILE_PATH:0:face_detection_model_path" + input_side_packet: "FILE_PATH:1:face_landmark_model_path" + output_side_packet: "CONTENTS:0:face_detection_model_blob" + output_side_packet: "CONTENTS:1:face_landmark_model_blob" +} + +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:face_detection_model_blob" + output_side_packet: "MODEL:face_detection_model" +} +node { + calculator: "TfLiteModelCalculator" + input_side_packet: "MODEL_BLOB:face_landmark_model_blob" + output_side_packet: "MODEL:face_landmark_model" +} + + +# Subgraph that detects faces and corresponding landmarks. +node { + calculator: "FaceLandmarkFrontSideModelCpuWithFaceCounter" + input_stream: "IMAGE:throttled_input_video" + input_side_packet: "NUM_FACES:num_faces" + input_side_packet: "MODEL:0:face_detection_model" + input_side_packet: "MODEL:1:face_landmark_model" + output_stream: "LANDMARKS:multi_face_landmarks" + output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" + output_stream: "DETECTIONS:face_detections" + output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count" +} + +)pb"; diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h new file mode 100644 index 0000000000..6705b42e66 --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h @@ -0,0 +1,105 @@ +#ifndef FACE_MESH_LIBRARY_H +#define FACE_MESH_LIBRARY_H + +#ifdef COMPILING_DLL +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT __declspec(dllimport) +#endif + +#include +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "absl/strings/str_replace.h" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/framework/formats/image_frame_opencv.h" +#include "mediapipe/framework/formats/landmark.pb.h" +#include "mediapipe/framework/formats/rect.pb.h" +#include "mediapipe/framework/output_stream_poller.h" +#include "mediapipe/framework/port/file_helpers.h" +#include "mediapipe/framework/port/opencv_highgui_inc.h" +#include "mediapipe/framework/port/opencv_imgproc_inc.h" +#include "mediapipe/framework/port/opencv_video_inc.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" + +class MPFaceMeshDetector { +public: + MPFaceMeshDetector(int numFaces, const char *face_detection_model_path, + const char *face_landmark_model_path); + + void DetectFaces(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces); + + void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces); + void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces); + + static constexpr auto kLandmarksNum = 468; + +private: + absl::Status InitFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path); + absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, + int *numFaces); + + absl::Status DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks); + absl::Status DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks); + + static constexpr auto kInputStream = "input_video"; + static constexpr auto kOutputStream_landmarks = "multi_face_landmarks"; + static constexpr auto kOutputStream_faceCount = "face_count"; + static constexpr auto kOutputStream_face_rects_from_landmarks = + "face_rects_from_landmarks"; + + static const std::string graphConfig; + + mediapipe::CalculatorGraph graph; + + std::unique_ptr landmarks_poller_ptr; + std::unique_ptr face_count_poller_ptr; + std::unique_ptr + face_rects_from_landmarks_poller_ptr; + + int face_count; + int image_width; + int image_height; + mediapipe::Packet face_landmarks_packet; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +DLLEXPORT MPFaceMeshDetector * +MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, + const char *face_landmark_model_path); + +DLLEXPORT void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector); + +DLLEXPORT void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector *detector, const cv::Mat &camera_frame, + cv::Rect *multi_face_bounding_boxes, int *numFaces); + +DLLEXPORT void +MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, + cv::Point2f **multi_face_landmarks, + int *numFaces); +DLLEXPORT void +MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, + cv::Point3f **multi_face_landmarks, + int *numFaces); + +DLLEXPORT extern const int MPFaceMeshDetectorLandmarksNum; + +#ifdef __cplusplus +}; +#endif +#endif \ No newline at end of file diff --git a/mediapipe/examples/desktop/face_mesh_dll/windows_dll_library.bzl b/mediapipe/examples/desktop/face_mesh_dll/windows_dll_library.bzl new file mode 100644 index 0000000000..69c243d606 --- /dev/null +++ b/mediapipe/examples/desktop/face_mesh_dll/windows_dll_library.bzl @@ -0,0 +1,62 @@ +""" +This is a simple windows_dll_library rule for builing a DLL Windows +that can be depended on by other cc rules. +Example useage: + windows_dll_library( + name = "hellolib", + srcs = [ + "hello-library.cpp", + ], + hdrs = ["hello-library.h"], + # Define COMPILING_DLL to export symbols during compiling the DLL. + copts = ["/DCOMPILING_DLL"], + ) +""" + +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_import", "cc_library") + +def windows_dll_library( + name, + srcs = [], + deps = [], + hdrs = [], + visibility = None, + **kwargs): + """A simple windows_dll_library rule for builing a DLL Windows.""" + dll_name = name + ".dll" + import_lib_name = name + "_import_lib" + import_target_name = name + "_dll_import" + + # Build the shared library + cc_binary( + name = dll_name, + srcs = srcs + hdrs, + deps = deps, + linkshared = 1, + **kwargs + ) + + # Get the import library for the dll + native.filegroup( + name = import_lib_name, + srcs = [":" + dll_name], + output_group = "interface_library", + ) + + # Because we cannot directly depend on cc_binary from other cc rules in deps attribute, + # we use cc_import as a bridge to depend on the dll. + cc_import( + name = import_target_name, + interface_library = ":" + import_lib_name, + shared_library = ":" + dll_name, + ) + + # Create a new cc_library to also include the headers needed for the shared library + cc_library( + name = name, + hdrs = hdrs, + visibility = visibility, + deps = deps + [ + ":" + import_target_name, + ], + ) \ No newline at end of file diff --git a/mediapipe/modules/face_detection/BUILD b/mediapipe/modules/face_detection/BUILD index 839418c77f..4a0b415447 100644 --- a/mediapipe/modules/face_detection/BUILD +++ b/mediapipe/modules/face_detection/BUILD @@ -57,6 +57,18 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_detection_short_range_side_model_cpu", + graph = "face_detection_short_range_side_model_cpu.pbtxt", + register_as = "FaceDetectionShortRangeSideModelCpu", + deps = [ + ":face_detection_short_range_common", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/util:to_image_calculator", + ], +) + mediapipe_simple_subgraph( name = "face_detection_short_range_gpu", graph = "face_detection_short_range_gpu.pbtxt", diff --git a/mediapipe/modules/face_detection/face_detection_short_range_side_model_cpu.pbtxt b/mediapipe/modules/face_detection/face_detection_short_range_side_model_cpu.pbtxt new file mode 100644 index 0000000000..57639bab24 --- /dev/null +++ b/mediapipe/modules/face_detection/face_detection_short_range_side_model_cpu.pbtxt @@ -0,0 +1,86 @@ +# MediaPipe graph to detect faces. (CPU input, and inference is executed on +# CPU.) +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceDetectionShortRangeCpu" +# input_stream: "IMAGE:image" +# input_side_packet: "MODEL:face_detection_model" +# output_stream: "DETECTIONS:face_detections" +# } + +type: "FaceDetectionShortRangeCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# TfLite model to detect faces. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite +# model only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:face_detection_model" + +# Detected faces. (std::vector) +# NOTE: there will not be an output packet in the DETECTIONS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "DETECTIONS:detections" + +# Converts the input CPU image (ImageFrame) to the multi-backend image type +# (Image). +node: { + calculator: "ToImageCalculator" + input_stream: "IMAGE_CPU:image" + output_stream: "IMAGE:multi_backend_image" +} + +# Transforms the input image into a 128x128 tensor while keeping the aspect +# ratio (what is expected by the corresponding face detection model), resulting +# in potential letterboxing in the transformed image. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:multi_backend_image" + output_stream: "TENSORS:input_tensors" + output_stream: "MATRIX:transform_matrix" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 128 + output_tensor_height: 128 + keep_aspect_ratio: true + output_tensor_float_range { + min: -1.0 + max: 1.0 + } + border_mode: BORDER_ZERO + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:detection_tensors" + input_side_packet: "MODEL:face_detection_model" + options { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { tflite {} } + } + } +} + +# Performs tensor post processing to generate face detections. +node { + calculator: "FaceDetectionShortRangeCommon" + input_stream: "TENSORS:detection_tensors" + input_stream: "MATRIX:transform_matrix" + output_stream: "DETECTIONS:detections" +} diff --git a/mediapipe/modules/face_landmark/BUILD b/mediapipe/modules/face_landmark/BUILD index 77560022eb..6e642d7fc8 100644 --- a/mediapipe/modules/face_landmark/BUILD +++ b/mediapipe/modules/face_landmark/BUILD @@ -37,6 +37,22 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_landmark_side_model_cpu", + graph = "face_landmark_side_model_cpu.pbtxt", + register_as = "FaceLandmarkSideModelCpu", + deps = [ + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:split_vector_calculator", + "//mediapipe/calculators/tensor:image_to_tensor_calculator", + "//mediapipe/calculators/tensor:inference_calculator", + "//mediapipe/calculators/tensor:tensors_to_floats_calculator", + "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", + "//mediapipe/calculators/util:landmark_projection_calculator", + "//mediapipe/calculators/util:thresholding_calculator", + ], +) + mediapipe_simple_subgraph( name = "face_landmark_gpu", graph = "face_landmark_gpu.pbtxt", @@ -74,6 +90,50 @@ mediapipe_simple_subgraph( ], ) +mediapipe_simple_subgraph( + name = "face_landmark_front_cpu_with_face_counter", + graph = "face_landmark_front_cpu_with_face_counter.pbtxt", + register_as = "FaceLandmarkFrontCpuWithFaceCounter", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_cpu", + ":face_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:counting_vector_size_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_cpu", + ], +) + +mediapipe_simple_subgraph( + name = "face_landmark_front_side_model_cpu_with_face_counter", + graph = "face_landmark_front_side_model_cpu_with_face_counter.pbtxt", + register_as = "FaceLandmarkFrontSideModelCpuWithFaceCounter", + deps = [ + ":face_detection_front_detection_to_roi", + ":face_landmark_side_model_cpu", + ":face_landmark_landmarks_to_roi", + "//mediapipe/calculators/core:begin_loop_calculator", + "//mediapipe/calculators/core:clip_vector_size_calculator", + "//mediapipe/calculators/core:constant_side_packet_calculator", + "//mediapipe/calculators/core:end_loop_calculator", + "//mediapipe/calculators/core:gate_calculator", + "//mediapipe/calculators/core:previous_loopback_calculator", + "//mediapipe/calculators/image:image_properties_calculator", + "//mediapipe/calculators/util:association_norm_rect_calculator", + "//mediapipe/calculators/util:collection_has_min_size_calculator", + "//mediapipe/calculators/util:counting_vector_size_calculator", + "//mediapipe/modules/face_detection:face_detection_short_range_side_model_cpu", + ], +) + mediapipe_simple_subgraph( name = "face_landmark_front_gpu", graph = "face_landmark_front_gpu.pbtxt", diff --git a/mediapipe/modules/face_landmark/face_landmark_front_cpu_with_face_counter.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_cpu_with_face_counter.pbtxt new file mode 100644 index 0000000000..5389a82932 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_cpu_with_face_counter.pbtxt @@ -0,0 +1,249 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# It is required that "face_detection_short_range.tflite" is available at +# "mediapipe/modules/face_detection/face_detection_short_range.tflite" +# path during execution. +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontCpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# (int) +output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count" + + +# Defines whether landmarks on the previous image should be used to help +# localize landmarks on the current image. +node { + name: "ConstantSidePacketCalculator" + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:use_prev_landmarks" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { bool_value: true } + } + } +} +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "gated_prev_face_rects_from_landmarks" +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:gated_prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if enough faces have already been identified from the +# previous image. Otherwise, passes the incoming image through to trigger a new +# round of face detection. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeCpu" + input_stream: "IMAGE:gated_image" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Counting a multi_faceLandmarks vector size. The image stream is only used to +# make the calculator work even when there is no input vector. +node { + calculator: "CountingNormalizedLandmarkListVectorSizeCalculator" + input_stream: "CLOCK:image" + input_stream: "VECTOR:multi_face_landmarks" + output_stream: "COUNT:face_count" +} + + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "gated_prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkCpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt b/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt new file mode 100644 index 0000000000..dc83f17b70 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_front_side_model_cpu_with_face_counter.pbtxt @@ -0,0 +1,256 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) This graph tries to skip face detection as much as possible +# by using previously detected/predicted landmarks for new images. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkFrontSideModelCpu" +# input_stream: "IMAGE:image" +# input_side_packet: "NUM_FACES:num_faces" +# input_side_packet: "MODEL:0:face_detection_model" +# input_side_packet: "MODEL:1:face_landmark_model" +# output_stream: "LANDMARKS:multi_face_landmarks" +# } + +type: "FaceLandmarkFrontSideModelCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" + +# Max number of faces to detect/track. (int) +input_side_packet: "NUM_FACES:num_faces" +# TfLite model to detect faces. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_detection/face_detection_short_range.tflite +# model only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:0:face_detection_model" +# TfLite model to detect face landmarks. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model +# only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:1:face_landmark_model" + +# Collection of detected/predicted faces, each represented as a list of 468 face +# landmarks. (std::vector) +# NOTE: there will not be an output packet in the LANDMARKS stream for this +# particular timestamp if none of faces detected. However, the MediaPipe +# framework will internally inform the downstream calculators of the absence of +# this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:multi_face_landmarks" + +# Extra outputs (for debugging, for instance). +# Detected faces. (std::vector) +output_stream: "DETECTIONS:face_detections" +# Regions of interest calculated based on landmarks. +# (std::vector) +output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" +# Regions of interest calculated based on face detections. +# (std::vector) +output_stream: "ROIS_FROM_DETECTIONS:face_rects_from_detections" + +# (int) +output_stream: "FACE_COUNT_FROM_LANDMARKS:face_count" + + +# Defines whether landmarks on the previous image should be used to help +# localize landmarks on the current image. +node { + name: "ConstantSidePacketCalculator" + calculator: "ConstantSidePacketCalculator" + output_side_packet: "PACKET:use_prev_landmarks" + options: { + [mediapipe.ConstantSidePacketCalculatorOptions.ext]: { + packet { bool_value: true } + } + } +} +node { + calculator: "GateCalculator" + input_side_packet: "ALLOW:use_prev_landmarks" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "gated_prev_face_rects_from_landmarks" +} + +# Determines if an input vector of NormalizedRect has a size greater than or +# equal to the provided num_faces. +node { + calculator: "NormalizedRectVectorHasMinSizeCalculator" + input_stream: "ITERABLE:prev_face_rects_from_landmarks" + input_side_packet: "num_faces" + output_stream: "prev_has_enough_faces" +} + +# Drops the incoming image if FaceLandmarkCpu was able to identify face presence +# in the previous image. Otherwise, passes the incoming image through to trigger +# a new round of face detection in FaceDetectionShortRangeCpu. +node { + calculator: "GateCalculator" + input_stream: "image" + input_stream: "DISALLOW:prev_has_enough_faces" + output_stream: "gated_image" + options: { + [mediapipe.GateCalculatorOptions.ext] { + empty_packets_as_allow: true + } + } +} + +# Detects faces. +node { + calculator: "FaceDetectionShortRangeSideModelCpu" + input_stream: "IMAGE:gated_image" + input_side_packet: "MODEL:face_detection_model" + output_stream: "DETECTIONS:all_face_detections" +} + +# Makes sure there are no more detections than the provided num_faces. +node { + calculator: "ClipDetectionVectorSizeCalculator" + input_stream: "all_face_detections" + output_stream: "face_detections" + input_side_packet: "num_faces" +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:gated_image" + output_stream: "SIZE:gated_image_size" +} + +# Outputs each element of face_detections at a fake timestamp for the rest of +# the graph to process. Clones the image size packet for each face_detection at +# the fake timestamp. At the end of the loop, outputs the BATCH_END timestamp +# for downstream calculators to inform them that all elements in the vector have +# been processed. +node { + calculator: "BeginLoopDetectionCalculator" + input_stream: "ITERABLE:face_detections" + input_stream: "CLONE:gated_image_size" + output_stream: "ITEM:face_detection" + output_stream: "CLONE:detections_loop_image_size" + output_stream: "BATCH_END:detections_loop_end_timestamp" +} + +# Calculates region of interest based on face detections, so that can be used +# to detect landmarks. +node { + calculator: "FaceDetectionFrontDetectionToRoi" + input_stream: "DETECTION:face_detection" + input_stream: "IMAGE_SIZE:detections_loop_image_size" + output_stream: "ROI:face_rect_from_detection" +} + +# Counting a multi_faceLandmarks vector size. The image stream is only used to +# make the calculator work even when there is no input vector. +node { + calculator: "CountingNormalizedLandmarkListVectorSizeCalculator" + input_stream: "CLOCK:image" + input_stream: "VECTOR:multi_face_landmarks" + output_stream: "COUNT:face_count" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_detection" + input_stream: "BATCH_END:detections_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_detections" +} + +# Performs association between NormalizedRect vector elements from previous +# image and rects based on face detections from the current image. This +# calculator ensures that the output face_rects vector doesn't contain +# overlapping regions based on the specified min_similarity_threshold. +node { + calculator: "AssociationNormRectCalculator" + input_stream: "face_rects_from_detections" + input_stream: "prev_face_rects_from_landmarks" + output_stream: "face_rects" + options: { + [mediapipe.AssociationCalculatorOptions.ext] { + min_similarity_threshold: 0.5 + } + } +} + +# Calculate size of the image. +node { + calculator: "ImagePropertiesCalculator" + input_stream: "IMAGE:image" + output_stream: "SIZE:image_size" +} + +# Outputs each element of face_rects at a fake timestamp for the rest of the +# graph to process. Clones image and image size packets for each +# single_face_rect at the fake timestamp. At the end of the loop, outputs the +# BATCH_END timestamp for downstream calculators to inform them that all +# elements in the vector have been processed. +node { + calculator: "BeginLoopNormalizedRectCalculator" + input_stream: "ITERABLE:face_rects" + input_stream: "CLONE:0:image" + input_stream: "CLONE:1:image_size" + output_stream: "ITEM:face_rect" + output_stream: "CLONE:0:landmarks_loop_image" + output_stream: "CLONE:1:landmarks_loop_image_size" + output_stream: "BATCH_END:landmarks_loop_end_timestamp" +} + +# Detects face landmarks within specified region of interest of the image. +node { + calculator: "FaceLandmarkSideModelCpu" + input_stream: "IMAGE:landmarks_loop_image" + input_stream: "ROI:face_rect" + input_side_packet: "MODEL:face_landmark_model" + output_stream: "LANDMARKS:face_landmarks" +} + +# Calculates region of interest based on face landmarks, so that can be reused +# for subsequent image. +node { + calculator: "FaceLandmarkLandmarksToRoi" + input_stream: "LANDMARKS:face_landmarks" + input_stream: "IMAGE_SIZE:landmarks_loop_image_size" + output_stream: "ROI:face_rect_from_landmarks" +} + +# Collects a set of landmarks for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of landmarks at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedLandmarkListVectorCalculator" + input_stream: "ITEM:face_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:multi_face_landmarks" +} + +# Collects a NormalizedRect for each face into a vector. Upon receiving the +# BATCH_END timestamp, outputs the vector of NormalizedRect at the BATCH_END +# timestamp. +node { + calculator: "EndLoopNormalizedRectCalculator" + input_stream: "ITEM:face_rect_from_landmarks" + input_stream: "BATCH_END:landmarks_loop_end_timestamp" + output_stream: "ITERABLE:face_rects_from_landmarks" +} + +# Caches face rects calculated from landmarks, and upon the arrival of the next +# input image, sends out the cached rects with timestamps replaced by that of +# the input image, essentially generating a packet that carries the previous +# face rects. Note that upon the arrival of the very first input image, a +# timestamp bound update occurs to jump start the feedback loop. +node { + calculator: "PreviousLoopbackCalculator" + input_stream: "MAIN:image" + input_stream: "LOOP:face_rects_from_landmarks" + input_stream_info: { + tag_index: "LOOP" + back_edge: true + } + output_stream: "PREV_LOOP:prev_face_rects_from_landmarks" +} diff --git a/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt b/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt new file mode 100644 index 0000000000..d8537fd823 --- /dev/null +++ b/mediapipe/modules/face_landmark/face_landmark_side_model_cpu.pbtxt @@ -0,0 +1,143 @@ +# MediaPipe graph to detect/predict face landmarks. (CPU input, and inference is +# executed on CPU.) +# +# It is required that "face_landmark.tflite" is available at +# "mediapipe/modules/face_landmark/face_landmark.tflite" +# path during execution. +# +# EXAMPLE: +# node { +# calculator: "FaceLandmarkCpu" +# input_stream: "IMAGE:image" +# input_stream: "ROI:face_roi" +# input_side_packet: "MODEL:face_landmark_model" +# output_stream: "LANDMARKS:face_landmarks" +# } + +type: "FaceLandmarkCpu" + +# CPU image. (ImageFrame) +input_stream: "IMAGE:image" +# ROI (region of interest) within the given image where a face is located. +# (NormalizedRect) +input_stream: "ROI:roi" + +# TfLite model to detect face landmarks. +# (std::unique_ptr>) +# NOTE: mediapipe/modules/face_landmark/face_landmark.tflite model +# only, can be passed here, otherwise - results are undefined. +input_side_packet: "MODEL:face_landmark_model" + + +# 468 face landmarks within the given ROI. (NormalizedLandmarkList) +# NOTE: if a face is not present within the given ROI, for this particular +# timestamp there will not be an output packet in the LANDMARKS stream. However, +# the MediaPipe framework will internally inform the downstream calculators of +# the absence of this packet so that they don't wait for it unnecessarily. +output_stream: "LANDMARKS:face_landmarks" + +# Transforms the input image into a 192x192 tensor. +node: { + calculator: "ImageToTensorCalculator" + input_stream: "IMAGE:image" + input_stream: "NORM_RECT:roi" + output_stream: "TENSORS:input_tensors" + options: { + [mediapipe.ImageToTensorCalculatorOptions.ext] { + output_tensor_width: 192 + output_tensor_height: 192 + output_tensor_float_range { + min: 0.0 + max: 1.0 + } + } + } +} + +# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a +# vector of tensors representing, for instance, detection boxes/keypoints and +# scores. +node { + calculator: "InferenceCalculator" + input_stream: "TENSORS:input_tensors" + output_stream: "TENSORS:output_tensors" + input_side_packet: "MODEL:face_landmark_model" + options { + [mediapipe.InferenceCalculatorOptions.ext] { + delegate { tflite {} } + } + } +} + +# Splits a vector of tensors into multiple vectors. +node { + calculator: "SplitTensorVectorCalculator" + input_stream: "output_tensors" + output_stream: "landmark_tensors" + output_stream: "face_flag_tensor" + options: { + [mediapipe.SplitVectorCalculatorOptions.ext] { + ranges: { begin: 0 end: 1 } + ranges: { begin: 1 end: 2 } + } + } +} + +# Converts the face-flag tensor into a float that represents the confidence +# score of face presence. +node { + calculator: "TensorsToFloatsCalculator" + input_stream: "TENSORS:face_flag_tensor" + output_stream: "FLOAT:face_presence_score" + options { + [mediapipe.TensorsToFloatsCalculatorOptions.ext] { + activation: SIGMOID + } + } +} + +# Applies a threshold to the confidence score to determine whether a face is +# present. +node { + calculator: "ThresholdingCalculator" + input_stream: "FLOAT:face_presence_score" + output_stream: "FLAG:face_presence" + options: { + [mediapipe.ThresholdingCalculatorOptions.ext] { + threshold: 0.5 + } + } +} + +# Drop landmarks tensors if face is not present. +node { + calculator: "GateCalculator" + input_stream: "landmark_tensors" + input_stream: "ALLOW:face_presence" + output_stream: "ensured_landmark_tensors" +} + +# Decodes the landmark tensors into a vector of landmarks, where the landmark +# coordinates are normalized by the size of the input image to the model. +node { + calculator: "TensorsToLandmarksCalculator" + input_stream: "TENSORS:ensured_landmark_tensors" + output_stream: "NORM_LANDMARKS:landmarks" + options: { + [mediapipe.TensorsToLandmarksCalculatorOptions.ext] { + num_landmarks: 468 + input_image_width: 192 + input_image_height: 192 + } + } +} + +# Projects the landmarks from the cropped face image to the corresponding +# locations on the full image before cropping (input to the graph). +node { + calculator: "LandmarkProjectionCalculator" + input_stream: "NORM_LANDMARKS:landmarks" + input_stream: "NORM_RECT:roi" + output_stream: "NORM_LANDMARKS:face_landmarks" +} From 08448ad5efbede67e1811b335c47596d4ff4802a Mon Sep 17 00:00:00 2001 From: dmaletskiy Date: Fri, 24 Sep 2021 15:00:57 +0300 Subject: [PATCH 3/8] fix: resolved merge conflicts --- .../apps/facemeshgpu_shared/face_mesh_lib.cpp | 17 ++++++++--------- .../apps/facemeshgpu_shared/face_mesh_lib.h | 5 +++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp index ab8e6d11de..4690a402c7 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp @@ -12,7 +12,9 @@ MPFaceMeshDetector::MPFaceMeshDetector(int numFaces, } absl::Status -MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, { +MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, + const char *face_detection_model_path, + const char *face_landmark_model_path) { numFaces = std::max(numFaces, 1); if (face_detection_model_path == nullptr) { @@ -46,7 +48,7 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, { LOG(INFO) << "Start running the calculator graph."; - ASSIGN_OR_RETURN(mediapipe::OutputStreamPolle landmarks_poller, + ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller landmarks_poller, graph.AddOutputStreamPoller(kOutputStream_landmarks)); ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller face_count_poller, graph.AddOutputStreamPoller(kOutputStream_faceCount)); @@ -170,14 +172,13 @@ void MPFaceMeshDetector::DetectFaces(const cv::Mat &camera_frame, } } -absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( - cv::Point2f **multi_face_landmarks) { +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks) { if (face_landmarks_packet.IsEmpty()) { return absl::CancelledError("Face landmarks packet is empty."); } - auto &face_landmarks = face_landmarks_packet + auto &face_landmarks = face_landmarks_packet.Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); const auto image_width_f = static_cast(image_width); const auto image_height_f = static_cast(image_height); @@ -203,14 +204,12 @@ absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( return absl::OkStatus(); } -absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( - cv::Point3f **multi_face_landmarks) { - +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks) { if (face_landmarks_packet.IsEmpty()) { return absl::CancelledError("Face landmarks packet is empty."); } - auto &face_landmarks = face_landmarks_packet + auto &face_landmarks = face_landmarks_packet.Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); const auto image_width_f = static_cast(image_width); const auto image_height_f = static_cast(image_height); diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h index 32f64492ec..6c7a2d8d32 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h @@ -42,8 +42,9 @@ class MPFaceMeshDetector { absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame, cv::Rect *multi_face_bounding_boxes, int *numFaces); - - + absl::Status DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks); + absl::Status DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks); + static constexpr auto kInputStream = "input_video"; static constexpr auto kOutputStream_landmarks = "multi_face_landmarks"; static constexpr auto kOutputStream_faceCount = "face_count"; From 9551efc7a512485d4ececf2426b85d23b5ac2a5a Mon Sep 17 00:00:00 2001 From: Pavlo-Ivan Mykhalevych Date: Tue, 14 Dec 2021 12:07:14 +0200 Subject: [PATCH 4/8] Change files to work with attention --- .../mediapipe/apps/facemeshgpu_shared/BUILD | 3 +- .../apps/facemeshgpu_shared/MainActivity.java | 93 ------- .../apps/facemeshgpu_shared/face_mesh_lib.cpp | 239 ++++++++++-------- .../apps/facemeshgpu_shared/face_mesh_lib.h | 132 +++++----- .../desktop/face_mesh_dll/face_mesh_lib.h | 2 +- 5 files changed, 204 insertions(+), 265 deletions(-) delete mode 100644 mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/MainActivity.java diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD index 859a830183..5c491a906b 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD @@ -42,6 +42,7 @@ cc_binary( "//mediapipe/framework/formats:image_frame", "//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/formats:landmark_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:file_helpers", "//mediapipe/framework/port:opencv_highgui", "//mediapipe/framework/port:opencv_imgproc", @@ -78,7 +79,7 @@ android_binary( manifest_values = { "applicationId": "com.google.mediapipe.apps.facemeshgpu", "appName": "Face Mesh", - "mainActivity": ".MainActivity", + "mainActivity": "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu:.MainActivity", "cameraFacingFront": "True", "binaryGraphName": "face_mesh_mobile_gpu.binarypb", "inputVideoStreamName": "input_video", diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/MainActivity.java b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/MainActivity.java deleted file mode 100644 index 82c1f44789..0000000000 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/MainActivity.java +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 The MediaPipe Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package com.google.mediapipe.apps.facemeshgpu; - -import android.os.Bundle; -import android.util.Log; -import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmark; -import com.google.mediapipe.formats.proto.LandmarkProto.NormalizedLandmarkList; -import com.google.mediapipe.framework.AndroidPacketCreator; -import com.google.mediapipe.framework.Packet; -import com.google.mediapipe.framework.PacketGetter; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** Main activity of MediaPipe face mesh app. */ -public class MainActivity extends com.google.mediapipe.apps.basic.MainActivity { - private static final String TAG = "MainActivity"; - - private static final String INPUT_NUM_FACES_SIDE_PACKET_NAME = "num_faces"; - private static final String OUTPUT_LANDMARKS_STREAM_NAME = "multi_face_landmarks"; - // Max number of faces to detect/process. - private static final int NUM_FACES = 1; - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - - AndroidPacketCreator packetCreator = processor.getPacketCreator(); - Map inputSidePackets = new HashMap<>(); - inputSidePackets.put(INPUT_NUM_FACES_SIDE_PACKET_NAME, packetCreator.createInt32(NUM_FACES)); - processor.setInputSidePackets(inputSidePackets); - - // To show verbose logging, run: - // adb shell setprop log.tag.MainActivity VERBOSE - if (Log.isLoggable(TAG, Log.VERBOSE)) { - processor.addPacketCallback( - OUTPUT_LANDMARKS_STREAM_NAME, - (packet) -> { - Log.v(TAG, "Received multi face landmarks packet."); - List multiFaceLandmarks = - PacketGetter.getProtoVector(packet, NormalizedLandmarkList.parser()); - Log.v( - TAG, - "[TS:" - + packet.getTimestamp() - + "] " - + getMultiFaceLandmarksDebugString(multiFaceLandmarks)); - }); - } - } - - private static String getMultiFaceLandmarksDebugString( - List multiFaceLandmarks) { - if (multiFaceLandmarks.isEmpty()) { - return "No face landmarks"; - } - String multiFaceLandmarksStr = "Number of faces detected: " + multiFaceLandmarks.size() + "\n"; - int faceIndex = 0; - for (NormalizedLandmarkList landmarks : multiFaceLandmarks) { - multiFaceLandmarksStr += - "\t#Face landmarks for face[" + faceIndex + "]: " + landmarks.getLandmarkCount() + "\n"; - int landmarkIndex = 0; - for (NormalizedLandmark landmark : landmarks.getLandmarkList()) { - multiFaceLandmarksStr += - "\t\tLandmark [" - + landmarkIndex - + "]: (" - + landmark.getX() - + ", " - + landmark.getY() - + ", " - + landmark.getZ() - + ")\n"; - ++landmarkIndex; - } - ++faceIndex; - } - return multiFaceLandmarksStr; - } -} diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp index 4690a402c7..a1cc1d8930 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp @@ -1,47 +1,55 @@ #include "face_mesh_lib.h" +int MPFaceMeshDetector::kLandmarksNum = 468; + MPFaceMeshDetector::MPFaceMeshDetector(int numFaces, - const char *face_detection_model_path, - const char *face_landmark_model_path) { - const auto status = InitFaceMeshDetector(numFaces, face_detection_model_path, - face_landmark_model_path); + bool with_attention, + const char* face_detection_model_path, + const char* face_landmark_model_path, + const char* face_landmark_with_attention_model_path) { + const auto status = InitFaceMeshDetector( + numFaces, + with_attention, + face_detection_model_path, + face_landmark_model_path, + face_landmark_with_attention_model_path); if (!status.ok()) { LOG(INFO) << "Failed constructing FaceMeshDetector."; LOG(INFO) << status.message(); } + if (with_attention) { + kLandmarksNum = kLandmarksNumWithAttention; + } } absl::Status MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, - const char *face_detection_model_path, - const char *face_landmark_model_path) { + bool with_attention, + const char* face_detection_model_path, + const char* face_landmark_model_path, + const char* face_landmark_with_attention_model_path) { numFaces = std::max(numFaces, 1); - if (face_detection_model_path == nullptr) { - face_detection_model_path = - "mediapipe/modules/face_detection/face_detection_short_range.tflite"; - } - - if (face_landmark_model_path == nullptr) { - face_landmark_model_path = - "mediapipe/modules/face_landmark/face_landmark.tflite"; + if (with_attention) { + face_landmark_model_path = face_landmark_with_attention_model_path; } // Prepare graph config. auto preparedGraphConfig = absl::StrReplaceAll( - graphConfig, {{"$numFaces", std::to_string(numFaces)}}); + graphConfig, { {"$numFaces", std::to_string(numFaces)} }); + preparedGraphConfig = absl::StrReplaceAll(preparedGraphConfig, { {"$with_attention", with_attention ? "true" : "false"} }); preparedGraphConfig = absl::StrReplaceAll( - preparedGraphConfig, - {{"$faceDetectionModelPath", face_detection_model_path}}); + preparedGraphConfig, + { {"$faceDetectionModelPath", face_detection_model_path} }); preparedGraphConfig = absl::StrReplaceAll( - preparedGraphConfig, - {{"$faceLandmarkModelPath", face_landmark_model_path}}); + preparedGraphConfig, + { {"$faceLandmarkModelPath", face_landmark_model_path} }); LOG(INFO) << "Get calculator graph config contents: " << preparedGraphConfig; mediapipe::CalculatorGraphConfig config = - mediapipe::ParseTextProtoOrDie( - preparedGraphConfig); + mediapipe::ParseTextProtoOrDie( + preparedGraphConfig); LOG(INFO) << "Initialize the calculator graph."; MP_RETURN_IF_ERROR(graph.Initialize(config)); @@ -49,19 +57,20 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, LOG(INFO) << "Start running the calculator graph."; ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller landmarks_poller, - graph.AddOutputStreamPoller(kOutputStream_landmarks)); + graph.AddOutputStreamPoller(kOutputStream_landmarks)); ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller face_count_poller, - graph.AddOutputStreamPoller(kOutputStream_faceCount)); - ASSIGN_OR_RETURN(mediapipe::OutputStreamPoller face_rects_from_landmarks_poller, - graph.AddOutputStreamPoller(kOutputStream_face_rects_from_landmarks)); + graph.AddOutputStreamPoller(kOutputStream_faceCount)); + ASSIGN_OR_RETURN( + mediapipe::OutputStreamPoller face_rects_from_landmarks_poller, + graph.AddOutputStreamPoller(kOutputStream_face_rects_from_landmarks)); landmarks_poller_ptr = std::make_unique( - std::move(landmarks_poller)); + std::move(landmarks_poller)); face_count_poller_ptr = std::make_unique( - std::move(face_count_poller)); + std::move(face_count_poller)); face_rects_from_landmarks_poller_ptr = - std::make_unique( - std::move(face_rects_from_landmarks_poller)); + std::make_unique( + std::move(face_rects_from_landmarks_poller)); MP_RETURN_IF_ERROR(graph.StartRun({})); @@ -71,13 +80,13 @@ MPFaceMeshDetector::InitFaceMeshDetector(int numFaces, } absl::Status -MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame, - cv::Rect *multi_face_bounding_boxes, - int *numFaces) { +MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat& camera_frame, + cv::Rect* multi_face_bounding_boxes, + int* numFaces) { if (!numFaces || !multi_face_bounding_boxes) { return absl::InvalidArgumentError( - "MPFaceMeshDetector::DetectFacesWithStatus requires notnull pointer to " - "save results data."); + "MPFaceMeshDetector::DetectFacesWithStatus requires notnull pointer to " + "save results data."); } // Reset face counts. @@ -86,26 +95,27 @@ MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame, // Wrap Mat into an ImageFrame. auto input_frame = absl::make_unique( - mediapipe::ImageFormat::SRGB, camera_frame.cols, camera_frame.rows, - mediapipe::ImageFrame::kDefaultAlignmentBoundary); + mediapipe::ImageFormat::SRGB, camera_frame.cols, camera_frame.rows, + mediapipe::ImageFrame::kDefaultAlignmentBoundary); cv::Mat input_frame_mat = mediapipe::formats::MatView(input_frame.get()); camera_frame.copyTo(input_frame_mat); // Send image packet into the graph. - static size_t timestamp = 0; + size_t frame_timestamp_us = static_cast(cv::getTickCount()) / + static_cast(cv::getTickFrequency()) * 1e6; MP_RETURN_IF_ERROR(graph.AddPacketToInputStream( - kInputStream, mediapipe::Adopt(input_frame.release()) - .At(mediapipe::Timestamp(timestamp++)))); + kInputStream, mediapipe::Adopt(input_frame.release()) + .At(mediapipe::Timestamp(frame_timestamp_us)))); // Get face count. mediapipe::Packet face_count_packet; if (!face_count_poller_ptr || !face_count_poller_ptr->Next(&face_count_packet)) { return absl::CancelledError( - "Failed during getting next face_count_packet."); + "Failed during getting next face_count_packet."); } - auto &face_count_val = face_count_packet.Get(); + auto& face_count_val = face_count_packet.Get(); if (face_count_val <= 0) { return absl::OkStatus(); @@ -115,14 +125,14 @@ MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame, mediapipe::Packet face_rects_from_landmarks_packet; if (!face_rects_from_landmarks_poller_ptr || !face_rects_from_landmarks_poller_ptr->Next( - &face_rects_from_landmarks_packet)) { + &face_rects_from_landmarks_packet)) { return absl::CancelledError( - "Failed during getting next face_rects_from_landmarks_packet."); + "Failed during getting next face_rects_from_landmarks_packet."); } - auto &face_bounding_boxes = - face_rects_from_landmarks_packet - .Get < ::std::vector < ::mediapipe::NormalizedRect >> (); + auto& face_bounding_boxes = + face_rects_from_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedRect>>(); image_width = camera_frame.cols; image_height = camera_frame.rows; @@ -131,21 +141,21 @@ MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame, // Convert vector (center based Rects) to cv::Rect* // (leftTop based Rects). - for (int i = 0; i < face_count_val; i) { - const auto &normalized_bounding_box = face_bounding_boxes[i]; - auto &bounding_box = multi_face_bounding_boxes[i]; + for (int i = 0; i < face_count_val; ++i) { + const auto& normalized_bounding_box = face_bounding_boxes[i]; + auto& bounding_box = multi_face_bounding_boxes[i]; const auto width = - static_cast(normalized_bounding_box.width() * image_width_f); + static_cast(normalized_bounding_box.width() * image_width_f); const auto height = - static_cast(normalized_bounding_box.height() * image_height_f); + static_cast(normalized_bounding_box.height() * image_height_f); bounding_box.x = - static_cast(normalized_bounding_box.x_center() * image_width_f) - - (width >> 1); + static_cast(normalized_bounding_box.x_center() * image_width_f) - + (width >> 1); bounding_box.y = - static_cast(normalized_bounding_box.y_center() * image_height_f) - - (height >> 1); + static_cast(normalized_bounding_box.y_center() * image_height_f) - + (height >> 1); bounding_box.width = width; bounding_box.height = height; } @@ -162,40 +172,42 @@ MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat &camera_frame, return absl::OkStatus(); } -void MPFaceMeshDetector::DetectFaces(const cv::Mat &camera_frame, - cv::Rect *multi_face_bounding_boxes, - int *numFaces) { +void MPFaceMeshDetector::DetectFaces(const cv::Mat& camera_frame, + cv::Rect* multi_face_bounding_boxes, + int* numFaces) { const auto status = - DetectFacesWithStatus(camera_frame, multi_face_bounding_boxes, numFaces); + DetectFacesWithStatus(camera_frame, multi_face_bounding_boxes, numFaces); if (!status.ok()) { LOG(INFO) << "MPFaceMeshDetector::DetectFaces failed: " << status.message(); } } - -absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks) { +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point2f** multi_face_landmarks) { if (face_landmarks_packet.IsEmpty()) { return absl::CancelledError("Face landmarks packet is empty."); } - auto &face_landmarks = face_landmarks_packet.Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); + auto& face_landmarks = + face_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); const auto image_width_f = static_cast(image_width); const auto image_height_f = static_cast(image_height); // Convert landmarks to cv::Point2f**. - for (int i = 0; i < face_count; i) { - const auto &normalizedLandmarkList = face_landmarks[i]; + for (int i = 0; i < face_count; ++i) { + const auto& normalizedLandmarkList = face_landmarks[i]; const auto landmarks_num = normalizedLandmarkList.landmark_size(); if (landmarks_num != kLandmarksNum) { return absl::CancelledError("Detected unexpected landmarks number."); } - auto &face_landmarks = multi_face_landmarks[i]; + auto& face_landmarks = multi_face_landmarks[i]; - for (int j = 0; j < landmarks_num; j) { - const auto &landmark = normalizedLandmarkList.landmark(j); + for (int j = 0; j < landmarks_num; ++j) { + const auto& landmark = normalizedLandmarkList.landmark(j); face_landmarks[j].x = landmark.x() * image_width_f; face_landmarks[j].y = landmark.y() * image_height_f; } @@ -204,29 +216,33 @@ absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus(cv::Point2f **multi_f return absl::OkStatus(); } -absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks) { +absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus( + cv::Point3f** multi_face_landmarks) { + if (face_landmarks_packet.IsEmpty()) { return absl::CancelledError("Face landmarks packet is empty."); } - auto &face_landmarks = face_landmarks_packet.Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); + auto& face_landmarks = + face_landmarks_packet + .Get<::std::vector<::mediapipe::NormalizedLandmarkList>>(); const auto image_width_f = static_cast(image_width); const auto image_height_f = static_cast(image_height); // Convert landmarks to cv::Point3f**. - for (int i = 0; i < face_count; i) { - const auto &normalized_landmark_list = face_landmarks[i]; + for (int i = 0; i < face_count; ++i) { + const auto& normalized_landmark_list = face_landmarks[i]; const auto landmarks_num = normalized_landmark_list.landmark_size(); if (landmarks_num != kLandmarksNum) { return absl::CancelledError("Detected unexpected landmarks number."); } - auto &face_landmarks = multi_face_landmarks[i]; + auto& face_landmarks = multi_face_landmarks[i]; - for (int j = 0; j < landmarks_num; j) { - const auto &landmark = normalized_landmark_list.landmark(j); + for (int j = 0; j < landmarks_num; ++j) { + const auto& landmark = normalized_landmark_list.landmark(j); face_landmarks[j].x = landmark.x() * image_width_f; face_landmarks[j].y = landmark.y() * image_height_f; face_landmarks[j].z = landmark.z(); @@ -236,61 +252,60 @@ absl::Status MPFaceMeshDetector::DetectLandmarksWithStatus(cv::Point3f **multi_f return absl::OkStatus(); } -void MPFaceMeshDetector::DetectLandmarks(cv::Point2f **multi_face_landmarks, - int *numFaces) { +void MPFaceMeshDetector::DetectLandmarks(cv::Point2f** multi_face_landmarks, + int* numFaces) { *numFaces = 0; const auto status = DetectLandmarksWithStatus(multi_face_landmarks); if (!status.ok()) { LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " - << status.message(); + << status.message(); } *numFaces = face_count; } -void MPFaceMeshDetector::DetectLandmarks(cv::Point3f **multi_face_landmarks, - int *numFaces) { +void MPFaceMeshDetector::DetectLandmarks(cv::Point3f** multi_face_landmarks, + int* numFaces) { *numFaces = 0; const auto status = DetectLandmarksWithStatus(multi_face_landmarks); if (!status.ok()) { LOG(INFO) << "MPFaceMeshDetector::DetectLandmarks failed: " - << status.message(); + << status.message(); } *numFaces = face_count; } extern "C" { -MPFaceMeshDetector * -MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, - const char *face_landmark_model_path) { - return new MPFaceMeshDetector(numFaces, face_detection_model_path, - face_landmark_model_path); -} + MPFaceMeshDetector* MPFaceMeshDetectorConstruct(int numFaces, + bool with_attention, + const char* face_detection_model_path, + const char* face_landmark_model_path, + const char* face_landmark_model_with_attention_path) { + return new MPFaceMeshDetector(numFaces, with_attention, face_detection_model_path, + face_landmark_model_path, face_landmark_model_with_attention_path); + } -void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector) { - delete detector; -} + void MPFaceMeshDetectorDestruct(MPFaceMeshDetector* detector) { + delete detector; + } -void MPFaceMeshDetectorDetectFaces( - MPFaceMeshDetector *detector, const cv::Mat &camera_frame, - cv::Rect *multi_face_bounding_boxes, int *numFaces) { - detector->DetectFaces(camera_frame, multi_face_bounding_boxes, numFaces); -} -void -MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, - cv::Point2f **multi_face_landmarks, - int *numFaces) { - detector->DetectLandmarks(multi_face_landmarks, numFaces); -} -void -MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, - cv::Point3f **multi_face_landmarks, - int *numFaces) { - detector->DetectLandmarks(multi_face_landmarks, numFaces); -} + void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector* detector, const cv::Mat& camera_frame, + cv::Rect* multi_face_bounding_boxes, int* numFaces) { + detector->DetectFaces(camera_frame, multi_face_bounding_boxes, numFaces); + } + void MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector* detector, + cv::Point2f** multi_face_landmarks, + int* numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); + } + void MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector* detector, + cv::Point3f** multi_face_landmarks, + int* numFaces) { + detector->DetectLandmarks(multi_face_landmarks, numFaces); + } -int MPFaceMeshDetectorLandmarksNum() { - return MPFaceMeshDetector::kLandmarksNum; -} + const int MPFaceMeshDetectorLandmarksNum = + MPFaceMeshDetector::kLandmarksNum; } const std::string MPFaceMeshDetector::graphConfig = R"pb( @@ -324,10 +339,12 @@ node { # Defines side packets for further use in the graph. node { calculator: "ConstantSidePacketCalculator" - output_side_packet: "PACKET:num_faces" + output_side_packet: "PACKET:0:num_faces" + output_side_packet: "PACKET:1:with_attention" node_options: { [type.googleapis.com/mediapipe.ConstantSidePacketCalculatorOptions]: { packet { int_value: $numFaces } + packet { bool_value: $with_attention } } } } @@ -367,6 +384,7 @@ node { input_side_packet: "MODEL_BLOB:face_detection_model_blob" output_side_packet: "MODEL:face_detection_model" } + node { calculator: "TfLiteModelCalculator" input_side_packet: "MODEL_BLOB:face_landmark_model_blob" @@ -381,6 +399,7 @@ node { input_side_packet: "NUM_FACES:num_faces" input_side_packet: "MODEL:0:face_detection_model" input_side_packet: "MODEL:1:face_landmark_model" + input_side_packet: "WITH_ATTENTION:with_attention" output_stream: "LANDMARKS:multi_face_landmarks" output_stream: "ROIS_FROM_LANDMARKS:face_rects_from_landmarks" output_stream: "DETECTIONS:face_detections" diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h index 6c7a2d8d32..ed0a8a05c7 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "absl/flags/flag.h" #include "absl/flags/parse.h" @@ -23,70 +24,81 @@ #include "mediapipe/framework/port/status.h" class MPFaceMeshDetector { - public: - MPFaceMeshDetector(int numFaces, const char *face_detection_model_path, - const char *face_landmark_model_path); - - void DetectFaces(const cv::Mat &camera_frame, - cv::Rect *multi_face_bounding_boxes, int *numFaces); - - void DetectLandmarks(cv::Point2f **multi_face_landmarks, int *numFaces); - void DetectLandmarks(cv::Point3f **multi_face_landmarks, int *numFaces); - - static constexpr auto kLandmarksNum = 468; - - private: - absl::Status InitFaceMeshDetector(int numFaces, - const char *face_detection_model_path, - const char *face_landmark_model_path); - absl::Status DetectFacesWithStatus(const cv::Mat &camera_frame, - cv::Rect *multi_face_bounding_boxes, - int *numFaces); - absl::Status DetectLandmarksWithStatus(cv::Point2f **multi_face_landmarks); - absl::Status DetectLandmarksWithStatus(cv::Point3f **multi_face_landmarks); - - static constexpr auto kInputStream = "input_video"; - static constexpr auto kOutputStream_landmarks = "multi_face_landmarks"; - static constexpr auto kOutputStream_faceCount = "face_count"; - static constexpr auto kOutputStream_face_rects_from_landmarks = "face_rects_from_landmarks"; - - static const std::string graphConfig; - - mediapipe::CalculatorGraph graph; - - std::unique_ptr landmarks_poller_ptr; - std::unique_ptr face_count_poller_ptr; - std::unique_ptr face_rects_from_landmarks_poller_ptr; - - int face_count; - int image_width; - int image_height; - mediapipe::Packet face_landmarks_packet; - }; +public: + MPFaceMeshDetector(int numFaces, + bool with_attention, + const char* face_detection_model_path, + const char* face_landmark_model_path, + const char* face_landmark_model_with_attention_path); + + void DetectFaces(const cv::Mat& camera_frame, + cv::Rect* multi_face_bounding_boxes, int* numFaces); + + void DetectLandmarks(cv::Point2f** multi_face_landmarks, int* numFaces); + void DetectLandmarks(cv::Point3f** multi_face_landmarks, int* numFaces); + + static constexpr auto kLandmarksNumWithoutAttention = 468; + static constexpr auto kLandmarksNumWithAttention = 478; + static int kLandmarksNum; + +private: + absl::Status InitFaceMeshDetector(int numFaces, + bool with_attention, + const char* face_detection_model_path, + const char* face_landmark_model_path, + const char* face_landmark_model_with_attention_path); + absl::Status DetectFacesWithStatus(const cv::Mat& camera_frame, + cv::Rect* multi_face_bounding_boxes, + int* numFaces); + + absl::Status DetectLandmarksWithStatus(cv::Point2f** multi_face_landmarks); + absl::Status DetectLandmarksWithStatus(cv::Point3f** multi_face_landmarks); + + static constexpr auto kInputStream = "input_video"; + static constexpr auto kOutputStream_landmarks = "multi_face_landmarks"; + static constexpr auto kOutputStream_faceCount = "face_count"; + static constexpr auto kOutputStream_face_rects_from_landmarks = + "face_rects_from_landmarks"; + + static const std::string graphConfig; + + mediapipe::CalculatorGraph graph; + + std::unique_ptr landmarks_poller_ptr; + std::unique_ptr face_count_poller_ptr; + std::unique_ptr + face_rects_from_landmarks_poller_ptr; + + int face_count; + int image_width; + int image_height; + mediapipe::Packet face_landmarks_packet; +}; #ifdef __cplusplus extern "C" { #endif -MPFaceMeshDetector * -MPFaceMeshDetectorConstruct(int numFaces, const char *face_detection_model_path, - const char *face_landmark_model_path); - -void MPFaceMeshDetectorDestruct(MPFaceMeshDetector *detector); - -void MPFaceMeshDetectorDetectFaces( - MPFaceMeshDetector *detector, const cv::Mat &camera_frame, - cv::Rect *multi_face_bounding_boxes, int *numFaces); - -void -MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector *detector, - cv::Point2f **multi_face_landmarks, - int *numFaces); -void -MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector *detector, - cv::Point3f **multi_face_landmarks, - int *numFaces); - -int MPFaceMeshDetectorLandmarksNum(); + + MPFaceMeshDetector* MPFaceMeshDetectorConstruct(int numFaces, + bool with_attention = true, + const char* face_detection_model_path = "mediapipe/modules/face_detection/face_detection_short_range.tflite", + const char* face_landmark_model_path = "mediapipe/modules/face_landmark/face_landmark.tflite", + const char* face_landmark_model_with_attention_path = "mediapipe/modules/face_landmark/face_landmark_with_attention.tflite"); + + void MPFaceMeshDetectorDestruct(MPFaceMeshDetector* detector); + + void MPFaceMeshDetectorDetectFaces( + MPFaceMeshDetector* detector, const cv::Mat& camera_frame, + cv::Rect* multi_face_bounding_boxes, int* numFaces); + + void MPFaceMeshDetectorDetect2DLandmarks(MPFaceMeshDetector* detector, + cv::Point2f** multi_face_landmarks, + int* numFaces); + void MPFaceMeshDetectorDetect3DLandmarks(MPFaceMeshDetector* detector, + cv::Point3f** multi_face_landmarks, + int* numFaces); + + extern const int MPFaceMeshDetectorLandmarksNum; #ifdef __cplusplus }; diff --git a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h index 9d78b2be4c..9f49579c44 100644 --- a/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h +++ b/mediapipe/examples/desktop/face_mesh_dll/face_mesh_lib.h @@ -35,7 +35,7 @@ class MPFaceMeshDetector { bool with_attention, const char *face_detection_model_path, const char *face_landmark_model_path, - const char* face_landmark_model_with_attention_path); + const char *face_landmark_model_with_attention_path); void DetectFaces(const cv::Mat &camera_frame, cv::Rect *multi_face_bounding_boxes, int *numFaces); From 8eae4ce07232091efcbd84ed3c9df5a88b5c529e Mon Sep 17 00:00:00 2001 From: Pavlo-Ivan Mykhalevych Date: Tue, 14 Dec 2021 12:49:02 +0200 Subject: [PATCH 5/8] Update --- mediapipe/modules/face_landmark/BUILD | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/mediapipe/modules/face_landmark/BUILD b/mediapipe/modules/face_landmark/BUILD index 9d02cb9eef..de16605b88 100644 --- a/mediapipe/modules/face_landmark/BUILD +++ b/mediapipe/modules/face_landmark/BUILD @@ -62,22 +62,6 @@ mediapipe_simple_subgraph( ], ) -mediapipe_simple_subgraph( - name = "face_landmark_side_model_cpu", - graph = "face_landmark_side_model_cpu.pbtxt", - register_as = "FaceLandmarkSideModelCpu", - deps = [ - "//mediapipe/calculators/core:gate_calculator", - "//mediapipe/calculators/core:split_vector_calculator", - "//mediapipe/calculators/tensor:image_to_tensor_calculator", - "//mediapipe/calculators/tensor:inference_calculator", - "//mediapipe/calculators/tensor:tensors_to_floats_calculator", - "//mediapipe/calculators/tensor:tensors_to_landmarks_calculator", - "//mediapipe/calculators/util:landmark_projection_calculator", - "//mediapipe/calculators/util:thresholding_calculator", - ], -) - mediapipe_simple_subgraph( name = "face_landmark_gpu", graph = "face_landmark_gpu.pbtxt", From c527ff00c36fa7e63e56ac23095ea66afa12237a Mon Sep 17 00:00:00 2001 From: pMykhalevych Date: Tue, 14 Dec 2021 14:49:20 +0200 Subject: [PATCH 6/8] Add Andriod so --- WORKSPACE | 4 +++- .../java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD | 4 ++-- .../google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h | 3 +-- third_party/opencv_linux.BUILD | 1 + 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index e3af65abe2..f7a84f0aa9 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -188,7 +188,7 @@ http_archive( new_local_repository( name = "linux_opencv", build_file = "@//third_party:opencv_linux.BUILD", - path = "/usr", + path = "/usr/local", ) new_local_repository( @@ -252,10 +252,12 @@ http_archive( # You may run setup_android.sh to install Android SDK and NDK. android_ndk_repository( name = "androidndk", + path = "/home/pavlik/Android/Ndk/android-ndk-r19c", ) android_sdk_repository( name = "androidsdk", + path = "/home/pavlik/Android/Sdk", ) # iOS basic build deps. diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD index 5c491a906b..a7bc981ec7 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/BUILD @@ -42,7 +42,7 @@ cc_binary( "//mediapipe/framework/formats:image_frame", "//mediapipe/framework/formats:image_frame_opencv", "//mediapipe/framework/formats:landmark_cc_proto", - "//mediapipe/framework/formats:rect_cc_proto", + "//mediapipe/framework/formats:rect_cc_proto", "//mediapipe/framework/port:file_helpers", "//mediapipe/framework/port:opencv_highgui", "//mediapipe/framework/port:opencv_imgproc", @@ -79,7 +79,7 @@ android_binary( manifest_values = { "applicationId": "com.google.mediapipe.apps.facemeshgpu", "appName": "Face Mesh", - "mainActivity": "//mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu:.MainActivity", + "mainActivity": ".MainActivity", "cameraFacingFront": "True", "binaryGraphName": "face_mesh_mobile_gpu.binarypb", "inputVideoStreamName": "input_video", diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h index ed0a8a05c7..0a33ed711c 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.h @@ -4,7 +4,6 @@ #include #include #include -#include #include "absl/flags/flag.h" #include "absl/flags/parse.h" @@ -103,4 +102,4 @@ extern "C" { #ifdef __cplusplus }; #endif -#endif \ No newline at end of file +#endif diff --git a/third_party/opencv_linux.BUILD b/third_party/opencv_linux.BUILD index 844585541e..6ca91a0c96 100644 --- a/third_party/opencv_linux.BUILD +++ b/third_party/opencv_linux.BUILD @@ -28,6 +28,7 @@ cc_library( #"include/opencv4/", ], linkopts = [ + "-L/usr/local/lib", "-l:libopencv_core.so", "-l:libopencv_calib3d.so", "-l:libopencv_features2d.so", From ffce0df0968d412f70e7cc7345bf7f6c0e16dea1 Mon Sep 17 00:00:00 2001 From: Pavlo-Ivan Mykhalevych Date: Tue, 14 Dec 2021 16:57:36 +0200 Subject: [PATCH 7/8] Update --- WORKSPACE | 4 +--- third_party/opencv_linux.BUILD | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index f7a84f0aa9..e3af65abe2 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -188,7 +188,7 @@ http_archive( new_local_repository( name = "linux_opencv", build_file = "@//third_party:opencv_linux.BUILD", - path = "/usr/local", + path = "/usr", ) new_local_repository( @@ -252,12 +252,10 @@ http_archive( # You may run setup_android.sh to install Android SDK and NDK. android_ndk_repository( name = "androidndk", - path = "/home/pavlik/Android/Ndk/android-ndk-r19c", ) android_sdk_repository( name = "androidsdk", - path = "/home/pavlik/Android/Sdk", ) # iOS basic build deps. diff --git a/third_party/opencv_linux.BUILD b/third_party/opencv_linux.BUILD index 6ca91a0c96..844585541e 100644 --- a/third_party/opencv_linux.BUILD +++ b/third_party/opencv_linux.BUILD @@ -28,7 +28,6 @@ cc_library( #"include/opencv4/", ], linkopts = [ - "-L/usr/local/lib", "-l:libopencv_core.so", "-l:libopencv_calib3d.so", "-l:libopencv_features2d.so", From 79274507cb3a6d1b095c97edd99572d632fa9f17 Mon Sep 17 00:00:00 2001 From: pMykhalevych Date: Wed, 5 Jan 2022 12:14:55 +0200 Subject: [PATCH 8/8] Fix bug with time ticks and change workspace to use opencv 3.4.14 --- WORKSPACE | 2 +- .../mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index f7a84f0aa9..a79927077a 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -220,7 +220,7 @@ http_archive( build_file = "@//third_party:opencv_android.BUILD", strip_prefix = "OpenCV-android-sdk", type = "zip", - url = "https://github.com/opencv/opencv/releases/download/3.4.3/opencv-3.4.3-android-sdk.zip", + url = "https://github.com/opencv/opencv/releases/download/3.4.14/opencv-3.4.14-android-sdk.zip", ) # After OpenCV 3.2.0, the pre-compiled opencv2.framework has google protobuf symbols, which will diff --git a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp index a1cc1d8930..29850d0d5f 100644 --- a/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp +++ b/mediapipe/examples/android/src/java/com/google/mediapipe/apps/facemeshgpu_shared/face_mesh_lib.cpp @@ -101,11 +101,12 @@ MPFaceMeshDetector::DetectFacesWithStatus(const cv::Mat& camera_frame, camera_frame.copyTo(input_frame_mat); // Send image packet into the graph. - size_t frame_timestamp_us = static_cast(cv::getTickCount()) / - static_cast(cv::getTickFrequency()) * 1e6; + //size_t frame_timestamp_us = static_cast(cv::getTickCount()) / + // static_cast(cv::getTickFrequency()) * 1e6; + static size_t frame_timestamp = 0; MP_RETURN_IF_ERROR(graph.AddPacketToInputStream( kInputStream, mediapipe::Adopt(input_frame.release()) - .At(mediapipe::Timestamp(frame_timestamp_us)))); + .At(mediapipe::Timestamp(frame_timestamp++)))); // Get face count. mediapipe::Packet face_count_packet; @@ -305,7 +306,7 @@ extern "C" { } const int MPFaceMeshDetectorLandmarksNum = - MPFaceMeshDetector::kLandmarksNum; + MPFaceMeshDetector::kLandmarksNumWithAttention; } const std::string MPFaceMeshDetector::graphConfig = R"pb(