Project import generated by Copybara.

GitOrigin-RevId: bbbbcb4f5174dea33525729ede47c770069157cd
liang0724s · Oct 18, 2021 · 1faeaae · 1faeaae
1 parent 33d683c
commit 1faeaae
Show file tree

Hide file tree

Showing 75 changed files with 1,941 additions and 557 deletions.
diff --git a/docs/solutions/hands.md b/docs/solutions/hands.md
@@ -120,7 +120,7 @@ just 86.22%.
 ### Hand Landmark Model
 
 After the palm detection over the whole image our subsequent hand landmark
-[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite)
+[model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite)
 performs precise keypoint localization of 21 3D hand-knuckle coordinates inside
 the detected hand regions via regression, that is direct coordinate prediction.
 The model learns a consistent internal hand pose representation and is robust
@@ -163,6 +163,11 @@ unrelated, images. Default to `false`.
 
 Maximum number of hands to detect. Default to `2`.
 
+#### model_complexity
+
+Complexity of the hand landmark model: `0` or `1`. Landmark accuracy as well as
+inference latency generally go up with the model complexity. Default to `1`.
+
 #### min_detection_confidence
 
 Minimum confidence value (`[0.0, 1.0]`) from the hand detection model for the
@@ -212,6 +217,7 @@ Supported configuration options:
 
 *   [static_image_mode](#static_image_mode)
 *   [max_num_hands](#max_num_hands)
+*   [model_complexity](#model_complexity)
 *   [min_detection_confidence](#min_detection_confidence)
 *   [min_tracking_confidence](#min_tracking_confidence)
 
@@ -260,6 +266,7 @@ with mp_hands.Hands(
 # For webcam input:
 cap = cv2.VideoCapture(0)
 with mp_hands.Hands(
+    model_complexity=0,
     min_detection_confidence=0.5,
     min_tracking_confidence=0.5) as hands:
   while cap.isOpened():
@@ -302,6 +309,7 @@ and a [fun application], and the following usage example.
 Supported configuration options:
 
 *   [maxNumHands](#max_num_hands)
+*   [modelComplexity](#model_complexity)
 *   [minDetectionConfidence](#min_detection_confidence)
 *   [minTrackingConfidence](#min_tracking_confidence)
 
@@ -351,6 +359,7 @@ const hands = new Hands({locateFile: (file) => {
 }});
 hands.setOptions({
   maxNumHands: 2,
+  modelComplexity: 1,
   minDetectionConfidence: 0.5,
   minTrackingConfidence: 0.5
 });

diff --git a/docs/solutions/models.md b/docs/solutions/models.md
@@ -58,10 +58,12 @@ one over the other.
     [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/palm_detection/palm_detection.tflite),
     [TF.js model](https://tfhub.dev/mediapipe/handdetector/1)
 *   Hand landmark model:
-    [TFLite model](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark.tflite),
+    [TFLite model (lite)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_lite.tflite),
+    [TFLite model (full)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_full.tflite),
     [TFLite model (sparse)](https://github.com/google/mediapipe/tree/master/mediapipe/modules/hand_landmark/hand_landmark_sparse.tflite),
     [TF.js model](https://tfhub.dev/mediapipe/handskeleton/1)
-*   [Model card](https://mediapipe.page.link/handmc), [Model card (sparse)](https://mediapipe.page.link/handmc-sparse)
+*   [Model card](https://mediapipe.page.link/handmc),
+    [Model card (sparse)](https://mediapipe.page.link/handmc-sparse)
 
 ### [Pose](https://google.github.io/mediapipe/solutions/pose)
 

diff --git a/docs/solutions/pose.md b/docs/solutions/pose.md
@@ -125,7 +125,7 @@ hip midpoints.
 :----------------------------------------------------------------------------------------------------: |
 *Fig 3. Vitruvian man aligned via two virtual keypoints predicted by BlazePose detector in addition to the face bounding box.* |
 
-### Pose Landmark Model (BlazePose GHUM 3D)
+### Pose Landmark Model (BlazePose [GHUM](https://github.com/google-research/google-research/tree/master/ghum) 3D)
 
 The landmark model in MediaPipe Pose predicts the location of 33 pose landmarks
 (see figure below).
@@ -486,6 +486,7 @@ on how to build MediaPipe examples.
     [BlazePose: On-device Real-time Body Pose Tracking](https://arxiv.org/abs/2006.10204)
     ([presentation](https://youtu.be/YPpUOTRn5tA))
 *   [Models and model cards](./models.md#pose)
+*   [GHUM & GHUML: Generative 3D Human Shape and Articulated Pose Models](https://github.com/google-research/google-research/tree/master/ghum)
 *   [Web demo](https://code.mediapipe.dev/codepen/pose)
 *   [Python Colab](https://mediapipe.page.link/pose_py_colab)
 

diff --git a/mediapipe/calculators/core/BUILD b/mediapipe/calculators/core/BUILD
@@ -531,9 +531,13 @@ cc_test(
         ":split_vector_calculator",
         "//mediapipe/framework:calculator_framework",
         "//mediapipe/framework:calculator_runner",
+        "//mediapipe/framework/api2:node",
+        "//mediapipe/framework/api2:port",
         "//mediapipe/framework/port:gtest_main",
         "//mediapipe/framework/port:parse_text_proto",
         "//mediapipe/framework/port:status",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/types:optional",
     ],
 )
 

diff --git a/mediapipe/calculators/core/begin_loop_calculator.cc b/mediapipe/calculators/core/begin_loop_calculator.cc
@@ -47,4 +47,8 @@ typedef BeginLoopCalculator<std::vector<std::vector<Matrix>>>
     BeginLoopMatrixVectorCalculator;
 REGISTER_CALCULATOR(BeginLoopMatrixVectorCalculator);
 
+// A calculator to process std::vector<uint64_t>.
+typedef BeginLoopCalculator<std::vector<uint64_t>> BeginLoopUint64tCalculator;
+REGISTER_CALCULATOR(BeginLoopUint64tCalculator);
+
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/core/mux_calculator_test.cc b/mediapipe/calculators/core/mux_calculator_test.cc
@@ -14,7 +14,11 @@
 
 #include <memory>
 
+#include "absl/status/status.h"
+#include "absl/types/optional.h"
 #include "mediapipe/calculators/core/split_vector_calculator.h"
+#include "mediapipe/framework/api2/node.h"
+#include "mediapipe/framework/api2/port.h"
 #include "mediapipe/framework/calculator_framework.h"
 #include "mediapipe/framework/calculator_runner.h"
 #include "mediapipe/framework/port/gtest.h"
@@ -301,4 +305,99 @@ TEST(MuxCalculatorTest, DiscardSkippedInputs_MuxInputStreamHandler) {
 }
 
 }  // namespace
+
+class PassThroughAndTsBoundUpdateNode : public mediapipe::api2::Node {
+ public:
+  static constexpr mediapipe::api2::Input<int> kInValue{"VALUE"};
+  static constexpr mediapipe::api2::Output<int> kOutValue{"VALUE"};
+  static constexpr mediapipe::api2::Output<int> kOutTsBoundUpdate{
+      "TS_BOUND_UPDATE"};
+  MEDIAPIPE_NODE_CONTRACT(kInValue, kOutValue, kOutTsBoundUpdate);
+
+  absl::Status Process(CalculatorContext* cc) override {
+    kOutValue(cc).Send(kInValue(cc));
+    kOutTsBoundUpdate(cc).SetNextTimestampBound(
+        cc->InputTimestamp().NextAllowedInStream());
+    return absl::OkStatus();
+  }
+};
+MEDIAPIPE_REGISTER_NODE(PassThroughAndTsBoundUpdateNode);
+
+class ToOptionalNode : public mediapipe::api2::Node {
+ public:
+  static constexpr mediapipe::api2::Input<int> kTick{"TICK"};
+  static constexpr mediapipe::api2::Input<int> kInValue{"VALUE"};
+  static constexpr mediapipe::api2::Output<absl::optional<int>> kOutValue{
+      "OUTPUT"};
+  MEDIAPIPE_NODE_CONTRACT(kTick, kInValue, kOutValue);
+
+  absl::Status Process(CalculatorContext* cc) override {
+    if (kInValue(cc).IsEmpty()) {
+      kOutValue(cc).Send(absl::nullopt);
+    } else {
+      kOutValue(cc).Send({kInValue(cc).Get()});
+    }
+    return absl::OkStatus();
+  }
+};
+MEDIAPIPE_REGISTER_NODE(ToOptionalNode);
+
+namespace {
+
+TEST(MuxCalculatorTest, HandleTimestampBoundUpdates) {
+  CalculatorGraphConfig config =
+      mediapipe::ParseTextProtoOrDie<CalculatorGraphConfig>(
+          R"pb(
+            input_stream: "select"
+            node {
+              calculator: "PassThroughAndTsBoundUpdateNode"
+              input_stream: "VALUE:select"
+              output_stream: "VALUE:select_ps"
+              output_stream: "TS_BOUND_UPDATE:ts_bound_update"
+            }
+            node {
+              calculator: "MuxCalculator"
+              input_stream: "INPUT:0:select_ps"
+              input_stream: "INPUT:1:ts_bound_update"
+              input_stream: "SELECT:select"
+              output_stream: "OUTPUT:select_or_ts_bound_update"
+            }
+            node {
+              calculator: "ToOptionalNode"
+              input_stream: "TICK:select"
+              input_stream: "VALUE:select_or_ts_bound_update"
+              output_stream: "OUTPUT:output"
+            }
+          )pb");
+  std::vector<Packet> output_packets;
+  tool::AddVectorSink("output", &config, &output_packets);
+
+  CalculatorGraph graph;
+  MP_ASSERT_OK(graph.Initialize(config));
+  MP_ASSERT_OK(graph.StartRun({}));
+
+  auto send_value_fn = [&](int value, Timestamp ts) -> absl::Status {
+    MP_RETURN_IF_ERROR(
+        graph.AddPacketToInputStream("select", MakePacket<int>(value).At(ts)));
+    return graph.WaitUntilIdle();
+  };
+
+  MP_ASSERT_OK(send_value_fn(0, Timestamp(1)));
+  ASSERT_EQ(output_packets.size(), 1);
+  EXPECT_EQ(output_packets[0].Get<absl::optional<int>>(), 0);
+
+  MP_ASSERT_OK(send_value_fn(1, Timestamp(2)));
+  ASSERT_EQ(output_packets.size(), 2);
+  EXPECT_EQ(output_packets[1].Get<absl::optional<int>>(), absl::nullopt);
+
+  MP_ASSERT_OK(send_value_fn(0, Timestamp(3)));
+  ASSERT_EQ(output_packets.size(), 3);
+  EXPECT_EQ(output_packets[2].Get<absl::optional<int>>(), 0);
+
+  MP_ASSERT_OK(graph.CloseAllInputStreams());
+  MP_ASSERT_OK(graph.WaitUntilDone());
+}
+
+}  // namespace
+
 }  // namespace mediapipe
diff --git a/mediapipe/calculators/tensor/inference_calculator.proto b/mediapipe/calculators/tensor/inference_calculator.proto
@@ -34,7 +34,6 @@ option java_outer_classname = "InferenceCalculatorProto";
 //     }
 //   }
 // }
-//
 message InferenceCalculatorOptions {
   extend mediapipe.CalculatorOptions {
     optional InferenceCalculatorOptions ext = 336783863;
@@ -69,8 +68,30 @@ message InferenceCalculatorOptions {
       // Load pre-compiled serialized binary cache to accelerate init process.
       // Only available for OpenCL delegate on Android.
       // Kernel caching will only be enabled if this path is set.
+      //
+      // NOTE: binary cache usage may be skipped if valid serialized model,
+      // specified by "serialized_model_dir", exists.
+      //
+      // TODO: update to cached_kernel_dir
       optional string cached_kernel_path = 2;
 
+      // A dir to load from and save to a pre-compiled serialized model used to
+      // accelerate init process.
+      //
+      // NOTE: available for OpenCL delegate on Android only when
+      // "use_advanced_gpu_api" is set to true and "model_token" is set
+      // properly.
+      //
+      // NOTE: serialized model takes precedence over binary cache
+      // specified by "cached_kernel_path", which still can be used if
+      // serialized model is invalid or missing.
+      optional string serialized_model_dir = 7;
+
+      // Unique token identifying the model. Used in conjunction with
+      // "serialized_model_dir". It is the caller's responsibility to ensure
+      // there is no clash of the tokens.
+      optional string model_token = 8;
+
       // Encapsulated compilation/runtime tradeoffs.
       enum InferenceUsage {
         UNSPECIFIED = 0;

diff --git a/mediapipe/calculators/tensor/inference_calculator_gl.cc b/mediapipe/calculators/tensor/inference_calculator_gl.cc
@@ -20,6 +20,7 @@
 #include "absl/memory/memory.h"
 #include "absl/status/status.h"
 #include "mediapipe/calculators/tensor/inference_calculator.h"
+#include "mediapipe/framework/deps/file_path.h"
 #include "mediapipe/util/tflite/config.h"
 
 #if MEDIAPIPE_TFLITE_GL_INFERENCE
@@ -49,8 +50,8 @@ class InferenceCalculatorGlImpl
   absl::Status Close(CalculatorContext* cc) override;
 
  private:
-  absl::Status ReadKernelsFromFile();
-  absl::Status WriteKernelsToFile();
+  absl::Status ReadGpuCaches();
+  absl::Status SaveGpuCaches();
   absl::Status LoadModel(CalculatorContext* cc);
   absl::Status LoadDelegate(CalculatorContext* cc);
   absl::Status LoadDelegateAndAllocateTensors(CalculatorContext* cc);
@@ -82,6 +83,8 @@ class InferenceCalculatorGlImpl
 
   bool use_kernel_caching_ = false;
   std::string cached_kernel_filename_;
+  bool use_serialized_model_ = false;
+  std::string serialized_model_path_;
 };
 
 absl::Status InferenceCalculatorGlImpl::UpdateContract(CalculatorContract* cc) {
@@ -114,6 +117,9 @@ absl::Status InferenceCalculatorGlImpl::Open(CalculatorContext* cc) {
   tflite_gpu_runner_usage_ = delegate.gpu().usage();
   use_kernel_caching_ =
       use_advanced_gpu_api_ && delegate.gpu().has_cached_kernel_path();
+  use_serialized_model_ = use_advanced_gpu_api_ &&
+                          delegate.gpu().has_serialized_model_dir() &&
+                          delegate.gpu().has_model_token();
   use_gpu_delegate_ = !use_advanced_gpu_api_;
 
   if (use_kernel_caching_) {
@@ -123,6 +129,12 @@ absl::Status InferenceCalculatorGlImpl::Open(CalculatorContext* cc) {
                               ".ker";
 #endif  // MEDIAPIPE_ANDROID
   }
+  if (use_serialized_model_) {
+#ifdef MEDIAPIPE_ANDROID
+    serialized_model_path_ = mediapipe::file::JoinPath(
+        delegate.gpu().serialized_model_dir(), delegate.gpu().model_token());
+#endif  // MEDIAPIPE_ANDROID
+  }
 
   // When use_advanced_gpu_api_, model loading is handled in InitTFLiteGPURunner
   // for everything.
@@ -210,7 +222,7 @@ absl::Status InferenceCalculatorGlImpl::Process(CalculatorContext* cc) {
   return absl::OkStatus();
 }
 
-absl::Status InferenceCalculatorGlImpl::WriteKernelsToFile() {
+absl::Status InferenceCalculatorGlImpl::SaveGpuCaches() {
 #ifdef MEDIAPIPE_ANDROID
   if (use_kernel_caching_) {
     // Save kernel file.
@@ -220,12 +232,22 @@ absl::Status InferenceCalculatorGlImpl::WriteKernelsToFile() {
     MP_RETURN_IF_ERROR(
         mediapipe::file::SetContents(cached_kernel_filename_, cache_str));
   }
+  if (use_serialized_model_) {
+    // Save serialized model file.
+    ASSIGN_OR_RETURN(std::vector<uint8_t> serialized_model_vec,
+                     tflite_gpu_runner_->GetSerializedModel());
+    absl::string_view serialized_model(
+        reinterpret_cast<char*>(serialized_model_vec.data()),
+        serialized_model_vec.size());
+    MP_RETURN_IF_ERROR(
+        mediapipe::file::SetContents(serialized_model_path_, serialized_model));
+  }
 #endif  // MEDIAPIPE_ANDROID
   return absl::OkStatus();
 }
 
 absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
-  MP_RETURN_IF_ERROR(WriteKernelsToFile());
+  MP_RETURN_IF_ERROR(SaveGpuCaches());
   if (use_gpu_delegate_) {
     MP_RETURN_IF_ERROR(gpu_helper_.RunInGlContext([this]() -> Status {
       gpu_buffers_in_.clear();
@@ -239,17 +261,24 @@ absl::Status InferenceCalculatorGlImpl::Close(CalculatorContext* cc) {
   return absl::OkStatus();
 }
 
-absl::Status InferenceCalculatorGlImpl::ReadKernelsFromFile() {
+absl::Status InferenceCalculatorGlImpl::ReadGpuCaches() {
 #ifdef MEDIAPIPE_ANDROID
-  if (use_kernel_caching_) {
+  if (use_kernel_caching_ && File::Exists(cached_kernel_filename_)) {
     // Load pre-compiled kernel file.
-    if (mediapipe::File::Exists(cached_kernel_filename_)) {
-      std::string cache_str;
-      MP_RETURN_IF_ERROR(
-          mediapipe::file::GetContents(cached_kernel_filename_, &cache_str));
-      std::vector<uint8_t> cache_vec(cache_str.begin(), cache_str.end());
-      tflite_gpu_runner_->SetSerializedBinaryCache(std::move(cache_vec));
-    }
+    std::string cache_str;
+    MP_RETURN_IF_ERROR(
+        mediapipe::file::GetContents(cached_kernel_filename_, &cache_str));
+    std::vector<uint8_t> cache_vec(cache_str.begin(), cache_str.end());
+    tflite_gpu_runner_->SetSerializedBinaryCache(std::move(cache_vec));
+  }
+  if (use_serialized_model_ && File::Exists(serialized_model_path_)) {
+    // Load serialized model file.
+    std::string serialized_model_str;
+    MP_RETURN_IF_ERROR(
+        file::GetContents(serialized_model_path_, &serialized_model_str));
+    std::vector<uint8_t> serialized_model_vec(serialized_model_str.begin(),
+                                              serialized_model_str.end());
+    tflite_gpu_runner_->SetSerializedModel(std::move(serialized_model_vec));
   }
 #endif  // MEDIAPIPE_ANDROID
   return absl::OkStatus();
@@ -313,7 +342,7 @@ absl::Status InferenceCalculatorGlImpl::InitTFLiteGPURunner(
                          tflite_gpu_runner_->GetOutputShapes()[i].c};
   }
 
-  MP_RETURN_IF_ERROR(ReadKernelsFromFile());
+  MP_RETURN_IF_ERROR(ReadGpuCaches());
 
   MP_RETURN_IF_ERROR(tflite_gpu_runner_->Build());