huggingface · IlyasMoutawwakil · Aug 27, 2024 · Jul 29, 2024 · Jul 29, 2024 · Aug 1, 2024
diff --git a/docker/Dockerfile.intel b/docker/Dockerfile.intel
@@ -27,6 +27,8 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
     libpng-dev \
     python3 \
     python3-pip \
+    python3-dev \
+    libnuma-dev \
     && rm -rf /var/lib/apt/lists/*"
 RUN /usr/sbin/update-ccache-symlinks
 RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
@@ -43,12 +45,13 @@ RUN python3 -m pip install --no-cache-dir \
     torchaudio==${TORCHAUDIO_VERSION} \
     -f https://download.pytorch.org/whl/torch_stable.html && \
     python3 -m pip install intel-extension-for-pytorch==$IPEX_VERSION && \
-    python3 -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
+    python3 -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ && \
+    python3 -m pip install --no-cache-dir  numa
 
 ARG OMP_NUM_THREADS=1
 ENV OMP_NUM_THREADS=${OMP_NUM_THREADS}
 ARG KMP_BLOCKTIME=1
 ENV KMP_BLOCKTIME=${KMP_BLOCKTIME}
 ARG KMP_HW_SUBSET=1T
 ENV KMP_HW_SUBSET=${KMP_HW_SUBSET}
-ENV LD_PRELOAD="/usr/local/lib/libiomp5.so /usr/lib/x86_64-linux-gnu/libtcmalloc.so"
+ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc.so"
diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py
@@ -60,7 +60,6 @@
 from ..utils.import_utils import is_ipex_version, is_torch_version, is_transformers_version
 from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS, recursive_to_device
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -129,6 +128,21 @@ def ipex_jit_trace(model, task, use_cache):
 
     return trace_model
 
+def get_int_from_env(env_keys, default):
+    """Returns the first positive env value found in the `env_keys` list or the default."""
+    for e in env_keys:
+        val = int(os.environ.get(e, -1))
+        if val >= 0:
+            return val
+    return default
+
+def get_number_of_sockets():
+    sockets = set()
+    with open('/proc/cpuinfo') as f:
+        for line in f:
+            if line.startswith('physical id'):
+                sockets.add(line.strip().split()[-1])
+    return len(sockets)
 
 class IPEXModel(OptimizedModel):
     auto_model_class = AutoModel
@@ -153,6 +167,18 @@ def __init__(
         else:
             self._device = torch.device("cpu")
 
+            import numa
+            import psutil
+            n_sockets=get_number_of_sockets()
+            num_cpu_threads_per_process = int(psutil.cpu_count(logical=False) / n_sockets)
+            os.environ["OMP_NUM_THREADS"]=str(num_cpu_threads_per_process)
+            torch.set_num_threads(num_cpu_threads_per_process)
+            numa.set_affinity(0,range(num_cpu_threads_per_process))
+            numa.set_membind([0])
+            print("affinity", numa.get_affinity(0))
+            print("membind", numa.get_membind())
+
+
         # CPU only support jit model for now.
         if export:
             if isinstance(model, torch.jit.RecursiveScriptModule):