From 8b6fa811ea7c154ac1beec800968f00a592752df Mon Sep 17 00:00:00 2001
From: Vivek Panyam <vip@uber.com>
Date: Wed, 24 Mar 2021 19:36:14 -0400
Subject: [PATCH 1/2] Update pip bootstrap URL

---
 build/install_python_deps.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/install_python_deps.sh b/build/install_python_deps.sh
index 05103eea..8ce239fe 100755
--- a/build/install_python_deps.sh
+++ b/build/install_python_deps.sh
@@ -5,7 +5,7 @@ set -e
 NEUROPOD_PYTHON_BINARY="python${NEUROPOD_PYTHON_VERSION}"
 
 # Install pip
-wget https://bootstrap.pypa.io/2.7/get-pip.py -O /tmp/get-pip.py
+wget https://bootstrap.pypa.io/pip/2.7/get-pip.py -O /tmp/get-pip.py
 ${NEUROPOD_PYTHON_BINARY} /tmp/get-pip.py
 
 # Setup a virtualenv

From 726d6fb00b52f02c44dc877e8a4f412b888cced3 Mon Sep 17 00:00:00 2001
From: Vivek Panyam <vip@uber.com>
Date: Fri, 7 Aug 2020 16:46:17 -0700
Subject: [PATCH 2/2] Add a RuntimeOption to set inter and intra op threadpool
 sizes

---
 .../backends/tensorflow/tf_backend.cc         | 22 ++++++++++++++++++-
 .../backends/torchscript/torch_backend.cc     | 15 +++++++++++++
 source/neuropod/options.hh                    | 17 ++++++++++++++
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/source/neuropod/backends/tensorflow/tf_backend.cc b/source/neuropod/backends/tensorflow/tf_backend.cc
index c30ad67d..c1a54450 100644
--- a/source/neuropod/backends/tensorflow/tf_backend.cc
+++ b/source/neuropod/backends/tensorflow/tf_backend.cc
@@ -93,7 +93,7 @@ void check_tf_status(const tensorflow::Status &status)
 }
 
 // Get TF session options given Neuropod RuntimeOptions
-tensorflow::SessionOptions get_tf_opts(const RuntimeOptions & /*unused*/)
+tensorflow::SessionOptions get_tf_opts(const RuntimeOptions &runtime_opts)
 {
     tensorflow::SessionOptions opts;
 
@@ -103,6 +103,26 @@ tensorflow::SessionOptions get_tf_opts(const RuntimeOptions & /*unused*/)
     opts.config.set_allow_soft_placement(true);
     opts.config.set_log_device_placement(false);
 
+    // Set intra and inter op parallelism
+    // See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
+    if (runtime_opts.experimental_intra_op_parallelism_threads != 0)
+    {
+        opts.config.set_intra_op_parallelism_threads(
+            static_cast<int32_t>(runtime_opts.experimental_intra_op_parallelism_threads));
+    }
+
+    if (runtime_opts.experimental_inter_op_parallelism_threads == 1)
+    {
+        // Only use the caller thread
+        opts.config.set_inter_op_parallelism_threads(-1);
+    }
+    else if (runtime_opts.experimental_inter_op_parallelism_threads > 1)
+    {
+        // The number in runtime_opts includes the caller thread
+        opts.config.set_inter_op_parallelism_threads(
+            static_cast<int32_t>(runtime_opts.experimental_inter_op_parallelism_threads) - 1);
+    }
+
     // Note: we can't use GPUOptions::visible_device_list as it is a per process setting
     //
     // From: https://github.com/tensorflow/tensorflow/issues/18861#issuecomment-385610497
diff --git a/source/neuropod/backends/torchscript/torch_backend.cc b/source/neuropod/backends/torchscript/torch_backend.cc
index 9c845893..88faebf5 100644
--- a/source/neuropod/backends/torchscript/torch_backend.cc
+++ b/source/neuropod/backends/torchscript/torch_backend.cc
@@ -225,6 +225,21 @@ std::mutex                      loaded_op_mutex;
 TorchNeuropodBackend::TorchNeuropodBackend(const std::string &neuropod_path, const RuntimeOptions &options)
     : NeuropodBackendWithDefaultAllocator<TorchNeuropodTensor>(neuropod_path, options)
 {
+// inter and intra op parallelism settings only supported in Torch >= 1.2.0
+#if CAFFE2_NIGHTLY_VERSION >= 20190808
+    // Set intra and inter op parallelism
+    // See https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
+    if (options.experimental_inter_op_parallelism_threads != 0)
+    {
+        at::set_num_interop_threads(static_cast<int32_t>(options.experimental_inter_op_parallelism_threads));
+    }
+
+    if (options.experimental_intra_op_parallelism_threads != 0)
+    {
+        at::set_num_threads(static_cast<int32_t>(options.experimental_intra_op_parallelism_threads));
+    }
+#endif
+
     if (options.load_model_at_construction)
     {
         load_model();
diff --git a/source/neuropod/options.hh b/source/neuropod/options.hh
index 319bda07..dcc529c8 100644
--- a/source/neuropod/options.hh
+++ b/source/neuropod/options.hh
@@ -75,6 +75,23 @@ struct RuntimeOptions
 
     // Whether or not to disable shape and type checking when running inference
     bool disable_shape_and_type_checking = false;
+
+    // EXPERIMENTAL
+    // Set the intra and inter op parallelism for the underlying framework
+    // Within a given process, only the first usage of the below configuration is used
+    // See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
+    // and https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api
+    // for more details
+    // For true per-model control of these values, use out-of-process execution (see above)
+    // A value of 0 means system defined
+    // Note: for TorchScript, requires at least Torch 1.2.0
+    uint32_t experimental_intra_op_parallelism_threads = 0;
+
+    // EXPERIMENTAL
+    // A value of 0 means system defined
+    // Note: this count includes the caller thread
+    // Note: for TorchScript, requires at least Torch 1.2.0
+    uint32_t experimental_inter_op_parallelism_threads = 0;
 };
 
 } // namespace neuropod