File tree Expand file tree Collapse file tree 3 files changed +43
-1
lines changed Expand file tree Collapse file tree 3 files changed +43
-1
lines changed Original file line number Diff line number Diff line change @@ -93,7 +93,7 @@ void check_tf_status(const tensorflow::Status &status)
9393}
9494
9595// Get TF session options given Neuropod RuntimeOptions
96- tensorflow::SessionOptions get_tf_opts (const RuntimeOptions & /* unused */ )
96+ tensorflow::SessionOptions get_tf_opts (const RuntimeOptions &runtime_opts )
9797{
9898 tensorflow::SessionOptions opts;
9999
@@ -103,6 +103,24 @@ tensorflow::SessionOptions get_tf_opts(const RuntimeOptions & /*unused*/)
103103 opts.config .set_allow_soft_placement (true );
104104 opts.config .set_log_device_placement (false );
105105
106+ // Set intra and inter op parallelism
107+ // See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
108+ if (runtime_opts.intra_op_parallelism_threads != 0 )
109+ {
110+ opts.config .set_intra_op_parallelism_threads (runtime_opts.intra_op_parallelism_threads );
111+ }
112+
113+ if (runtime_opts.inter_op_parallelism_threads == 1 )
114+ {
115+ // Only use the caller thread
116+ opts.config .set_inter_op_parallelism_threads (-1 );
117+ }
118+ else if (runtime_opts.inter_op_parallelism_threads > 1 )
119+ {
120+ // The numer in runtime_opts includes the caller thread
121+ opts.config .set_inter_op_parallelism_threads (runtime_opts.inter_op_parallelism_threads - 1 );
122+ }
123+
106124 // Note: we can't use GPUOptions::visible_device_list as it is a per process setting
107125 //
108126 // From: https://github.com/tensorflow/tensorflow/issues/18861#issuecomment-385610497
Original file line number Diff line number Diff line change @@ -225,6 +225,18 @@ std::mutex loaded_op_mutex;
225225TorchNeuropodBackend::TorchNeuropodBackend (const std::string &neuropod_path, const RuntimeOptions &options)
226226 : NeuropodBackendWithDefaultAllocator<TorchNeuropodTensor>(neuropod_path, options)
227227{
228+ // Set intra and inter op parallelism
229+ // See https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
230+ if (options.inter_op_parallelism_threads != 0 )
231+ {
232+ at::set_num_interop_threads (options.inter_op_parallelism_threads );
233+ }
234+
235+ if (options.intra_op_parallelism_threads != 0 )
236+ {
237+ at::set_num_threads (options.intra_op_parallelism_threads );
238+ }
239+
228240 if (options.load_model_at_construction )
229241 {
230242 load_model ();
Original file line number Diff line number Diff line change @@ -75,6 +75,18 @@ struct RuntimeOptions
7575
7676 // Whether or not to disable shape and type checking when running inference
7777 bool disable_shape_and_type_checking = false ;
78+
79+ // Set the intra and inter op parallelism for the underlying framework
80+ // Within a given process, only the first usage of the below configuration is used
81+ // See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
82+ // and https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
83+ // for more details
84+ // A value of 0 means system defined
85+ uint32_t intra_op_parallelism_threads = 0 ;
86+
87+ // A value of 0 means system defined
88+ // Note: this count includes the caller thread
89+ uint32_t inter_op_parallelism_threads = 0 ;
7890};
7991
8092} // namespace neuropod
You can’t perform that action at this time.
0 commit comments