Merge pull request #567 from kirilg/r1.3

TensorFlow Serving r1.3
tensorflow · Aug 18, 2017 · 267d682 · 267d682
2 parents 1a88c25 + 2c5b4e1
commit 267d682
Show file tree

Hide file tree

Showing 28 changed files with 490 additions and 99 deletions.
diff --git a/tensorflow b/tensorflow
diff --git a/tensorflow_serving/apis/BUILD b/tensorflow_serving/apis/BUILD
@@ -21,7 +21,6 @@ filegroup(
 load("//tensorflow_serving:serving.bzl", "serving_proto_library")
 load("//tensorflow_serving:serving.bzl", "serving_proto_library_py")
 load("//tensorflow_serving:serving.bzl", "serving_go_grpc_library")
-load("@org_tensorflow//tensorflow/core:platform/default/build_config.bzl", "tf_pyclif_proto_library")
 
 serving_proto_library(
     name = "get_model_metadata_proto",
@@ -67,12 +66,6 @@ serving_proto_library_py(
     ],
 )
 
-tf_pyclif_proto_library(
-    name = "input_pyclif",
-    proto_lib = ":input_proto",
-    proto_srcfile = "input.proto",
-)
-
 serving_proto_library(
     name = "model_proto",
     srcs = ["model.proto"],
@@ -91,12 +84,6 @@ serving_proto_library_py(
     deps = [],
 )
 
-tf_pyclif_proto_library(
-    name = "model_pyclif",
-    proto_lib = ":model_proto",
-    proto_srcfile = "model.proto",
-)
-
 serving_proto_library(
     name = "predict_proto",
     srcs = ["predict.proto"],
@@ -178,12 +165,6 @@ serving_proto_library_py(
     ],
 )
 
-tf_pyclif_proto_library(
-    name = "classification_pyclif",
-    proto_lib = ":classification_proto",
-    proto_srcfile = "classification.proto",
-)
-
 serving_proto_library(
     name = "inference_proto",
     srcs = ["inference.proto"],
@@ -210,12 +191,6 @@ serving_proto_library_py(
     ],
 )
 
-tf_pyclif_proto_library(
-    name = "inference_pyclif",
-    proto_lib = ":inference_proto",
-    proto_srcfile = "inference.proto",
-)
-
 serving_proto_library(
     name = "regression_proto",
     srcs = ["regression.proto"],
@@ -239,12 +214,6 @@ serving_proto_library_py(
     ],
 )
 
-tf_pyclif_proto_library(
-    name = "regression_pyclif",
-    proto_lib = ":regression_proto",
-    proto_srcfile = "regression.proto",
-)
-
 cc_library(
     name = "classifier",
     hdrs = ["classifier.h"],

diff --git a/tensorflow_serving/config/model_server_config.proto b/tensorflow_serving/config/model_server_config.proto
@@ -38,11 +38,7 @@ message ModelConfig {
   // (This cannot be changed once a model is in serving.)
   string model_platform = 4;
 
-  // DEPRECATED: This field is deprecated. For now it's still obeyed as long as
-  // 'model_version_policy' is not set. If 'model_version_policy' is set, then
-  // the value of this field is ignored.
-  FileSystemStoragePathSourceConfig.VersionPolicy version_policy = 5
-      [deprecated = true];
+  reserved 5;
 
   // Version policy for the model indicating how many versions of the model to
   // be served at the same time.

diff --git a/tensorflow_serving/core/BUILD b/tensorflow_serving/core/BUILD
@@ -87,6 +87,7 @@ cc_library(
     deps = [
         ":loader",
         ":source_adapter",
+        "//tensorflow_serving/resources:resource_util",
         "//tensorflow_serving/resources:resource_values",
         "//tensorflow_serving/util:any_ptr",
         "//tensorflow_serving/util:optional",

diff --git a/tensorflow_serving/core/loader.h b/tensorflow_serving/core/loader.h
@@ -71,7 +71,10 @@ class Loader {
   ///     the estimate must specify the instance to which each resource is
   ///     bound.
   ///  4. The estimate must be monotonically non-increasing, i.e. it cannot
-  ///     increase over time.
+  ///     increase over time. Reasons to have it potentially decrease over time
+  //      include: (a) replace conservative estimate with actual measurement
+  //      once loaded in memory; (b) load process consumes extra transient
+  //      memory that is not used in steady-state after the load completes.
   ///
   /// @return an estimate of the resources the servable will consume once
   /// loaded. If the servable has already been loaded, returns an estimate of

diff --git a/tensorflow_serving/core/simple_loader.h b/tensorflow_serving/core/simple_loader.h
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow_serving/core/loader.h"
 #include "tensorflow_serving/core/source_adapter.h"
+#include "tensorflow_serving/resources/resource_util.h"
 #include "tensorflow_serving/resources/resource_values.h"
 #include "tensorflow_serving/util/any_ptr.h"
 #include "tensorflow_serving/util/optional.h"
@@ -62,6 +63,9 @@ namespace serving {
 //   };
 //   std::unique_ptr<Loader> loader(new SimpleLoader<time_t>(
 //       servable_creator, resource_estimator));
+//
+// This class is not thread-safe. Synchronization is assumed to be done by the
+// caller.
 template <typename ServableType>
 class SimpleLoader : public Loader {
  public:
@@ -80,7 +84,19 @@ class SimpleLoader : public Loader {
   // and hence the serving system cannot enforce resource safety.
   static ResourceEstimator EstimateNoResources();
 
+  // Constructor that takes a single resource estimator, to use for estimating
+  // the resources needed during load as well as post-load.
   SimpleLoader(Creator creator, ResourceEstimator resource_estimator);
+
+  // Constructor that takes two resource estimators: one to use for estimating
+  // the resources needed during load, as well as a second one that gives a
+  // different estimate after loading has finished. See the documentation on
+  // Loader::EstimateResources() for (a) potential reasons the estimate might
+  // decrease, and (b) correctness constraints on how the estimate is allowed to
+  // change over time.
+  SimpleLoader(Creator creator, ResourceEstimator resource_estimator,
+               ResourceEstimator post_load_resource_estimator);
+
   ~SimpleLoader() override = default;
 
   Status EstimateResources(ResourceAllocation* estimate) const override;
@@ -94,11 +110,20 @@ class SimpleLoader : public Loader {
  private:
   Creator creator_;
 
+  // A function that estimates the resources needed to load the servable.
   ResourceEstimator resource_estimator_;
 
-  // The memoized estimated resource requirement of the session bundle servable.
+  // An optional function that estimates the resources needed for the servable
+  // after it has been loaded. (If omitted, 'resource_estimator_' should be used
+  // for all estimates, i.e. before, during and after load.)
+  optional<ResourceEstimator> post_load_resource_estimator_;
+
+  // The memoized estimated resource requirement of the servable.
   mutable optional<ResourceAllocation> memoized_resource_estimate_;
 
+  std::unique_ptr<ResourceUtil> resource_util_;
+  Resource ram_resource_;
+
   std::unique_ptr<ServableType> servable_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(SimpleLoader);
@@ -180,7 +205,23 @@ SimpleLoader<ServableType>::EstimateNoResources() {
 template <typename ServableType>
 SimpleLoader<ServableType>::SimpleLoader(Creator creator,
                                          ResourceEstimator resource_estimator)
-    : creator_(creator), resource_estimator_(resource_estimator) {}
+    : creator_(creator), resource_estimator_(resource_estimator) {
+  ResourceUtil::Options resource_util_options;
+  resource_util_options.devices = {{device_types::kMain, 1}};
+  resource_util_ =
+      std::unique_ptr<ResourceUtil>(new ResourceUtil(resource_util_options));
+
+  ram_resource_ = resource_util_->CreateBoundResource(
+      device_types::kMain, resource_kinds::kRamBytes);
+}
+
+template <typename ServableType>
+SimpleLoader<ServableType>::SimpleLoader(
+    Creator creator, ResourceEstimator resource_estimator,
+    ResourceEstimator post_load_resource_estimator)
+    : SimpleLoader(creator, resource_estimator) {
+  post_load_resource_estimator_ = post_load_resource_estimator;
+}
 
 template <typename ServableType>
 Status SimpleLoader<ServableType>::EstimateResources(
@@ -198,8 +239,36 @@ Status SimpleLoader<ServableType>::EstimateResources(
 
 template <typename ServableType>
 Status SimpleLoader<ServableType>::Load() {
-  const Status status = creator_(&servable_);
-  return status;
+  TF_RETURN_IF_ERROR(creator_(&servable_));
+
+  if (post_load_resource_estimator_) {
+    // Save the during-load estimate (may be able to use the memoized value).
+    ResourceAllocation during_load_resource_estimate;
+    TF_RETURN_IF_ERROR(EstimateResources(&during_load_resource_estimate));
+
+    // Obtain the post-load estimate, and store it as the memoized value.
+    ResourceAllocation post_load_resource_estimate;
+    TF_RETURN_IF_ERROR(
+        (*post_load_resource_estimator_)(&post_load_resource_estimate));
+    memoized_resource_estimate_ = post_load_resource_estimate;
+
+    // Release any transient memory used only during load to the OS.
+    const uint64 during_load_ram_estimate = resource_util_->GetQuantity(
+        ram_resource_, during_load_resource_estimate);
+    const uint64 post_load_ram_estimate =
+        resource_util_->GetQuantity(ram_resource_, post_load_resource_estimate);
+    if (post_load_ram_estimate < during_load_ram_estimate) {
+      const uint64 transient_ram_estimate =
+          during_load_ram_estimate - post_load_ram_estimate;
+      LOG(INFO) << "Calling MallocExtension_ReleaseToSystem() after servable "
+                   "load with "
+                << transient_ram_estimate;
+      ::tensorflow::port::MallocExtension_ReleaseToSystem(
+          transient_ram_estimate);
+    }
+  }
+
+  return Status::OK();
 }
 
 template <typename ServableType>
@@ -219,14 +288,13 @@ void SimpleLoader<ServableType>::Unload() {
 
   // If we have a main-memory footprint estimate, release that amount of memory
   // to the OS.
-  for (const ResourceAllocation::Entry& entry :
-       resource_estimate.resource_quantities()) {
-    if (entry.resource().device() == device_types::kMain &&
-        entry.resource().kind() == resource_kinds::kRamBytes) {
-      LOG(INFO) << "Calling MallocExtension_ReleaseToSystem() with "
-                << entry.quantity();
-      ::tensorflow::port::MallocExtension_ReleaseToSystem(entry.quantity());
-    }
+  const uint64 memory_estimate =
+      resource_util_->GetQuantity(ram_resource_, resource_estimate);
+  if (memory_estimate > 0) {
+    LOG(INFO) << "Calling MallocExtension_ReleaseToSystem() after servable "
+                 "unload with "
+              << memory_estimate;
+    ::tensorflow::port::MallocExtension_ReleaseToSystem(memory_estimate);
   }
 }
 

diff --git a/tensorflow_serving/core/simple_loader_test.cc b/tensorflow_serving/core/simple_loader_test.cc
@@ -96,13 +96,69 @@ TEST(SimpleLoaderTest, ResourceEstimation) {
         *estimate = want;
         return Status::OK();
       }));
-  for (int i = 0; i < 2; ++i) {
+
+  {
+    ResourceAllocation got;
+    TF_ASSERT_OK(loader->EstimateResources(&got));
+    EXPECT_THAT(got, EqualsProto(want));
+  }
+
+  // The estimate should remain the same after load.
+  TF_ASSERT_OK(loader->Load());
+  {
     ResourceAllocation got;
     TF_ASSERT_OK(loader->EstimateResources(&got));
     EXPECT_THAT(got, EqualsProto(want));
   }
 }
 
+TEST(SimpleLoaderTest, ResourceEstimationWithPostLoadRelease) {
+  const auto pre_load_resources = CreateProto<ResourceAllocation>(
+      "resource_quantities { "
+      "  resource { "
+      "    device: 'main' "
+      "    kind: 'processing' "
+      "  } "
+      "  quantity: 42 "
+      "} ");
+  const auto post_load_resources = CreateProto<ResourceAllocation>(
+      "resource_quantities { "
+      "  resource { "
+      "    device: 'main' "
+      "    kind: 'processing' "
+      "  } "
+      "  quantity: 17 "
+      "} ");
+  std::unique_ptr<Loader> loader(new SimpleLoader<int>(
+      [](std::unique_ptr<int>* servable) {
+        servable->reset(new int);
+        return Status::OK();
+      },
+      [&pre_load_resources](ResourceAllocation* estimate) {
+        *estimate = pre_load_resources;
+        return Status::OK();
+      },
+      [&post_load_resources](ResourceAllocation* estimate) {
+        *estimate = post_load_resources;
+        return Status::OK();
+      }));
+
+  // Run it twice, to exercise memoization.
+  for (int i = 0; i < 2; ++i) {
+    ResourceAllocation got;
+    TF_ASSERT_OK(loader->EstimateResources(&got));
+    EXPECT_THAT(got, EqualsProto(pre_load_resources));
+  }
+
+  // The estimate should switch to the post-load one after load.
+  TF_ASSERT_OK(loader->Load());
+  {
+    ResourceAllocation got;
+    TF_ASSERT_OK(loader->EstimateResources(&got));
+    EXPECT_THAT(got, EqualsProto(post_load_resources));
+  }
+}
+
 // Verify that the error returned by the Creator is propagates back through
 // Load.
 TEST(SimpleLoaderTest, LoadError) {

diff --git a/tensorflow_serving/g3doc/METADATA b/tensorflow_serving/g3doc/METADATA
@@ -1,6 +1,6 @@
 name: "TensorFlow Serving"
 g3doc: {
   include: "/learning/serving/g3doc/METADATA"
-  sitemap_file: "/learning/serving/g3doc/users/sitemap.md"
+  sitemap_file: "/learning/serving/g3doc/sitemap.md"
 }
 
diff --git a/tensorflow_serving/g3doc/setup.md b/tensorflow_serving/g3doc/setup.md
@@ -182,7 +182,7 @@ in the documentation, you can add the flags `-c opt --copt=-msse4.1
 subset of these flags). For example:
 
 ```shell
-bazel build -c opt --config=mkl --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-mavx2 --copt=-mfma --copt=-O3 tensorflow_serving/...
+bazel build -c opt --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-mavx2 --copt=-mfma --copt=-O3 tensorflow_serving/...
 ```
 
 Note: These instruction sets are not available on all machines, especially with

diff --git a/tensorflow_serving/g3doc/signature_defs.md b/tensorflow_serving/g3doc/signature_defs.md
@@ -54,7 +54,7 @@ constants. Specifically:
     C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/signature_constants.h).
 
 In addition, SavedModel provides a
-[util](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/utils.py)
+[util](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/signature_def_utils.py)
 to help build a signature-def.
 
 ## Sample structures