Skip to content

Commit

Permalink
Merge pull request #567 from kirilg/r1.3
Browse files Browse the repository at this point in the history
TensorFlow Serving r1.3
  • Loading branch information
kirilg authored Aug 18, 2017
2 parents 1a88c25 + 2c5b4e1 commit 267d682
Show file tree
Hide file tree
Showing 28 changed files with 490 additions and 99 deletions.
2 changes: 1 addition & 1 deletion tensorflow
Submodule tensorflow updated 777 files
31 changes: 0 additions & 31 deletions tensorflow_serving/apis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ filegroup(
load("//tensorflow_serving:serving.bzl", "serving_proto_library")
load("//tensorflow_serving:serving.bzl", "serving_proto_library_py")
load("//tensorflow_serving:serving.bzl", "serving_go_grpc_library")
load("@org_tensorflow//tensorflow/core:platform/default/build_config.bzl", "tf_pyclif_proto_library")

serving_proto_library(
name = "get_model_metadata_proto",
Expand Down Expand Up @@ -67,12 +66,6 @@ serving_proto_library_py(
],
)

tf_pyclif_proto_library(
name = "input_pyclif",
proto_lib = ":input_proto",
proto_srcfile = "input.proto",
)

serving_proto_library(
name = "model_proto",
srcs = ["model.proto"],
Expand All @@ -91,12 +84,6 @@ serving_proto_library_py(
deps = [],
)

tf_pyclif_proto_library(
name = "model_pyclif",
proto_lib = ":model_proto",
proto_srcfile = "model.proto",
)

serving_proto_library(
name = "predict_proto",
srcs = ["predict.proto"],
Expand Down Expand Up @@ -178,12 +165,6 @@ serving_proto_library_py(
],
)

tf_pyclif_proto_library(
name = "classification_pyclif",
proto_lib = ":classification_proto",
proto_srcfile = "classification.proto",
)

serving_proto_library(
name = "inference_proto",
srcs = ["inference.proto"],
Expand All @@ -210,12 +191,6 @@ serving_proto_library_py(
],
)

tf_pyclif_proto_library(
name = "inference_pyclif",
proto_lib = ":inference_proto",
proto_srcfile = "inference.proto",
)

serving_proto_library(
name = "regression_proto",
srcs = ["regression.proto"],
Expand All @@ -239,12 +214,6 @@ serving_proto_library_py(
],
)

tf_pyclif_proto_library(
name = "regression_pyclif",
proto_lib = ":regression_proto",
proto_srcfile = "regression.proto",
)

cc_library(
name = "classifier",
hdrs = ["classifier.h"],
Expand Down
6 changes: 1 addition & 5 deletions tensorflow_serving/config/model_server_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@ message ModelConfig {
// (This cannot be changed once a model is in serving.)
string model_platform = 4;

// DEPRECATED: This field is deprecated. For now it's still obeyed as long as
// 'model_version_policy' is not set. If 'model_version_policy' is set, then
// the value of this field is ignored.
FileSystemStoragePathSourceConfig.VersionPolicy version_policy = 5
[deprecated = true];
reserved 5;

// Version policy for the model indicating how many versions of the model to
// be served at the same time.
Expand Down
1 change: 1 addition & 0 deletions tensorflow_serving/core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ cc_library(
deps = [
":loader",
":source_adapter",
"//tensorflow_serving/resources:resource_util",
"//tensorflow_serving/resources:resource_values",
"//tensorflow_serving/util:any_ptr",
"//tensorflow_serving/util:optional",
Expand Down
5 changes: 4 additions & 1 deletion tensorflow_serving/core/loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ class Loader {
/// the estimate must specify the instance to which each resource is
/// bound.
/// 4. The estimate must be monotonically non-increasing, i.e. it cannot
/// increase over time.
/// increase over time. Reasons to have it potentially decrease over time
// include: (a) replace conservative estimate with actual measurement
// once loaded in memory; (b) load process consumes extra transient
// memory that is not used in steady-state after the load completes.
///
/// @return an estimate of the resources the servable will consume once
/// loaded. If the servable has already been loaded, returns an estimate of
Expand Down
92 changes: 80 additions & 12 deletions tensorflow_serving/core/simple_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ limitations under the License.
#include "tensorflow/core/platform/types.h"
#include "tensorflow_serving/core/loader.h"
#include "tensorflow_serving/core/source_adapter.h"
#include "tensorflow_serving/resources/resource_util.h"
#include "tensorflow_serving/resources/resource_values.h"
#include "tensorflow_serving/util/any_ptr.h"
#include "tensorflow_serving/util/optional.h"
Expand Down Expand Up @@ -62,6 +63,9 @@ namespace serving {
// };
// std::unique_ptr<Loader> loader(new SimpleLoader<time_t>(
// servable_creator, resource_estimator));
//
// This class is not thread-safe. Synchronization is assumed to be done by the
// caller.
template <typename ServableType>
class SimpleLoader : public Loader {
public:
Expand All @@ -80,7 +84,19 @@ class SimpleLoader : public Loader {
// and hence the serving system cannot enforce resource safety.
static ResourceEstimator EstimateNoResources();

// Constructor that takes a single resource estimator, to use for estimating
// the resources needed during load as well as post-load.
SimpleLoader(Creator creator, ResourceEstimator resource_estimator);

// Constructor that takes two resource estimators: one to use for estimating
// the resources needed during load, as well as a second one that gives a
// different estimate after loading has finished. See the documentation on
// Loader::EstimateResources() for (a) potential reasons the estimate might
// decrease, and (b) correctness constraints on how the estimate is allowed to
// change over time.
SimpleLoader(Creator creator, ResourceEstimator resource_estimator,
ResourceEstimator post_load_resource_estimator);

~SimpleLoader() override = default;

Status EstimateResources(ResourceAllocation* estimate) const override;
Expand All @@ -94,11 +110,20 @@ class SimpleLoader : public Loader {
private:
Creator creator_;

// A function that estimates the resources needed to load the servable.
ResourceEstimator resource_estimator_;

// The memoized estimated resource requirement of the session bundle servable.
// An optional function that estimates the resources needed for the servable
// after it has been loaded. (If omitted, 'resource_estimator_' should be used
// for all estimates, i.e. before, during and after load.)
optional<ResourceEstimator> post_load_resource_estimator_;

// The memoized estimated resource requirement of the servable.
mutable optional<ResourceAllocation> memoized_resource_estimate_;

std::unique_ptr<ResourceUtil> resource_util_;
Resource ram_resource_;

std::unique_ptr<ServableType> servable_;

TF_DISALLOW_COPY_AND_ASSIGN(SimpleLoader);
Expand Down Expand Up @@ -180,7 +205,23 @@ SimpleLoader<ServableType>::EstimateNoResources() {
template <typename ServableType>
SimpleLoader<ServableType>::SimpleLoader(Creator creator,
ResourceEstimator resource_estimator)
: creator_(creator), resource_estimator_(resource_estimator) {}
: creator_(creator), resource_estimator_(resource_estimator) {
ResourceUtil::Options resource_util_options;
resource_util_options.devices = {{device_types::kMain, 1}};
resource_util_ =
std::unique_ptr<ResourceUtil>(new ResourceUtil(resource_util_options));

ram_resource_ = resource_util_->CreateBoundResource(
device_types::kMain, resource_kinds::kRamBytes);
}

template <typename ServableType>
SimpleLoader<ServableType>::SimpleLoader(
Creator creator, ResourceEstimator resource_estimator,
ResourceEstimator post_load_resource_estimator)
: SimpleLoader(creator, resource_estimator) {
post_load_resource_estimator_ = post_load_resource_estimator;
}

template <typename ServableType>
Status SimpleLoader<ServableType>::EstimateResources(
Expand All @@ -198,8 +239,36 @@ Status SimpleLoader<ServableType>::EstimateResources(

template <typename ServableType>
Status SimpleLoader<ServableType>::Load() {
const Status status = creator_(&servable_);
return status;
TF_RETURN_IF_ERROR(creator_(&servable_));

if (post_load_resource_estimator_) {
// Save the during-load estimate (may be able to use the memoized value).
ResourceAllocation during_load_resource_estimate;
TF_RETURN_IF_ERROR(EstimateResources(&during_load_resource_estimate));

// Obtain the post-load estimate, and store it as the memoized value.
ResourceAllocation post_load_resource_estimate;
TF_RETURN_IF_ERROR(
(*post_load_resource_estimator_)(&post_load_resource_estimate));
memoized_resource_estimate_ = post_load_resource_estimate;

// Release any transient memory used only during load to the OS.
const uint64 during_load_ram_estimate = resource_util_->GetQuantity(
ram_resource_, during_load_resource_estimate);
const uint64 post_load_ram_estimate =
resource_util_->GetQuantity(ram_resource_, post_load_resource_estimate);
if (post_load_ram_estimate < during_load_ram_estimate) {
const uint64 transient_ram_estimate =
during_load_ram_estimate - post_load_ram_estimate;
LOG(INFO) << "Calling MallocExtension_ReleaseToSystem() after servable "
"load with "
<< transient_ram_estimate;
::tensorflow::port::MallocExtension_ReleaseToSystem(
transient_ram_estimate);
}
}

return Status::OK();
}

template <typename ServableType>
Expand All @@ -219,14 +288,13 @@ void SimpleLoader<ServableType>::Unload() {

// If we have a main-memory footprint estimate, release that amount of memory
// to the OS.
for (const ResourceAllocation::Entry& entry :
resource_estimate.resource_quantities()) {
if (entry.resource().device() == device_types::kMain &&
entry.resource().kind() == resource_kinds::kRamBytes) {
LOG(INFO) << "Calling MallocExtension_ReleaseToSystem() with "
<< entry.quantity();
::tensorflow::port::MallocExtension_ReleaseToSystem(entry.quantity());
}
const uint64 memory_estimate =
resource_util_->GetQuantity(ram_resource_, resource_estimate);
if (memory_estimate > 0) {
LOG(INFO) << "Calling MallocExtension_ReleaseToSystem() after servable "
"unload with "
<< memory_estimate;
::tensorflow::port::MallocExtension_ReleaseToSystem(memory_estimate);
}
}

Expand Down
58 changes: 57 additions & 1 deletion tensorflow_serving/core/simple_loader_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,69 @@ TEST(SimpleLoaderTest, ResourceEstimation) {
*estimate = want;
return Status::OK();
}));
for (int i = 0; i < 2; ++i) {

{
ResourceAllocation got;
TF_ASSERT_OK(loader->EstimateResources(&got));
EXPECT_THAT(got, EqualsProto(want));
}

// The estimate should remain the same after load.
TF_ASSERT_OK(loader->Load());
{
ResourceAllocation got;
TF_ASSERT_OK(loader->EstimateResources(&got));
EXPECT_THAT(got, EqualsProto(want));
}
}

TEST(SimpleLoaderTest, ResourceEstimationWithPostLoadRelease) {
const auto pre_load_resources = CreateProto<ResourceAllocation>(
"resource_quantities { "
" resource { "
" device: 'main' "
" kind: 'processing' "
" } "
" quantity: 42 "
"} ");
const auto post_load_resources = CreateProto<ResourceAllocation>(
"resource_quantities { "
" resource { "
" device: 'main' "
" kind: 'processing' "
" } "
" quantity: 17 "
"} ");
std::unique_ptr<Loader> loader(new SimpleLoader<int>(
[](std::unique_ptr<int>* servable) {
servable->reset(new int);
return Status::OK();
},
[&pre_load_resources](ResourceAllocation* estimate) {
*estimate = pre_load_resources;
return Status::OK();
},
[&post_load_resources](ResourceAllocation* estimate) {
*estimate = post_load_resources;
return Status::OK();
}));

// Run it twice, to exercise memoization.
for (int i = 0; i < 2; ++i) {
ResourceAllocation got;
TF_ASSERT_OK(loader->EstimateResources(&got));
EXPECT_THAT(got, EqualsProto(pre_load_resources));
}

// The estimate should switch to the post-load one after load.
TF_ASSERT_OK(loader->Load());
{
ResourceAllocation got;
TF_ASSERT_OK(loader->EstimateResources(&got));
EXPECT_THAT(got, EqualsProto(post_load_resources));
}
}

// Verify that the error returned by the Creator is propagates back through
// Load.
TEST(SimpleLoaderTest, LoadError) {
Expand Down
2 changes: 1 addition & 1 deletion tensorflow_serving/g3doc/METADATA
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: "TensorFlow Serving"
g3doc: {
include: "/learning/serving/g3doc/METADATA"
sitemap_file: "/learning/serving/g3doc/users/sitemap.md"
sitemap_file: "/learning/serving/g3doc/sitemap.md"
}

2 changes: 1 addition & 1 deletion tensorflow_serving/g3doc/setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ in the documentation, you can add the flags `-c opt --copt=-msse4.1
subset of these flags). For example:

```shell
bazel build -c opt --config=mkl --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-mavx2 --copt=-mfma --copt=-O3 tensorflow_serving/...
bazel build -c opt --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-mavx2 --copt=-mfma --copt=-O3 tensorflow_serving/...
```

Note: These instruction sets are not available on all machines, especially with
Expand Down
2 changes: 1 addition & 1 deletion tensorflow_serving/g3doc/signature_defs.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ constants. Specifically:
C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/signature_constants.h).

In addition, SavedModel provides a
[util](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/utils.py)
[util](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/signature_def_utils.py)
to help build a signature-def.

## Sample structures
Expand Down
Loading

0 comments on commit 267d682

Please sign in to comment.