Skip to content

Commit 726d6fb

Browse files
committed
Add a RuntimeOption to set inter and intra op threadpool sizes
1 parent 8b6fa81 commit 726d6fb

File tree

3 files changed

+53
-1
lines changed

3 files changed

+53
-1
lines changed

source/neuropod/backends/tensorflow/tf_backend.cc

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ void check_tf_status(const tensorflow::Status &status)
9393
}
9494

9595
// Get TF session options given Neuropod RuntimeOptions
96-
tensorflow::SessionOptions get_tf_opts(const RuntimeOptions & /*unused*/)
96+
tensorflow::SessionOptions get_tf_opts(const RuntimeOptions &runtime_opts)
9797
{
9898
tensorflow::SessionOptions opts;
9999

@@ -103,6 +103,26 @@ tensorflow::SessionOptions get_tf_opts(const RuntimeOptions & /*unused*/)
103103
opts.config.set_allow_soft_placement(true);
104104
opts.config.set_log_device_placement(false);
105105

106+
// Set intra and inter op parallelism
107+
// See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
108+
if (runtime_opts.experimental_intra_op_parallelism_threads != 0)
109+
{
110+
opts.config.set_intra_op_parallelism_threads(
111+
static_cast<int32_t>(runtime_opts.experimental_intra_op_parallelism_threads));
112+
}
113+
114+
if (runtime_opts.experimental_inter_op_parallelism_threads == 1)
115+
{
116+
// Only use the caller thread
117+
opts.config.set_inter_op_parallelism_threads(-1);
118+
}
119+
else if (runtime_opts.experimental_inter_op_parallelism_threads > 1)
120+
{
121+
// The number in runtime_opts includes the caller thread
122+
opts.config.set_inter_op_parallelism_threads(
123+
static_cast<int32_t>(runtime_opts.experimental_inter_op_parallelism_threads) - 1);
124+
}
125+
106126
// Note: we can't use GPUOptions::visible_device_list as it is a per process setting
107127
//
108128
// From: https://github.com/tensorflow/tensorflow/issues/18861#issuecomment-385610497

source/neuropod/backends/torchscript/torch_backend.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,21 @@ std::mutex loaded_op_mutex;
225225
TorchNeuropodBackend::TorchNeuropodBackend(const std::string &neuropod_path, const RuntimeOptions &options)
226226
: NeuropodBackendWithDefaultAllocator<TorchNeuropodTensor>(neuropod_path, options)
227227
{
228+
// inter and intra op parallelism settings only supported in Torch >= 1.2.0
229+
#if CAFFE2_NIGHTLY_VERSION >= 20190808
230+
// Set intra and inter op parallelism
231+
// See https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
232+
if (options.experimental_inter_op_parallelism_threads != 0)
233+
{
234+
at::set_num_interop_threads(static_cast<int32_t>(options.experimental_inter_op_parallelism_threads));
235+
}
236+
237+
if (options.experimental_intra_op_parallelism_threads != 0)
238+
{
239+
at::set_num_threads(static_cast<int32_t>(options.experimental_intra_op_parallelism_threads));
240+
}
241+
#endif
242+
228243
if (options.load_model_at_construction)
229244
{
230245
load_model();

source/neuropod/options.hh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,23 @@ struct RuntimeOptions
7575

7676
// Whether or not to disable shape and type checking when running inference
7777
bool disable_shape_and_type_checking = false;
78+
79+
// EXPERIMENTAL
80+
// Set the intra and inter op parallelism for the underlying framework
81+
// Within a given process, only the first usage of the below configuration is used
82+
// See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
83+
// and https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
84+
// for more details
85+
// For true per-model control of these values, use out-of-process execution (see above)
86+
// A value of 0 means system defined
87+
// Note: for TorchScript, requires at least Torch 1.2.0
88+
uint32_t experimental_intra_op_parallelism_threads = 0;
89+
90+
// EXPERIMENTAL
91+
// A value of 0 means system defined
92+
// Note: this count includes the caller thread
93+
// Note: for TorchScript, requires at least Torch 1.2.0
94+
uint32_t experimental_inter_op_parallelism_threads = 0;
7895
};
7996

8097
} // namespace neuropod

0 commit comments

Comments
 (0)