From 5cb9fe9dccc1796cc0273ad19106d938695a38bf Mon Sep 17 00:00:00 2001
From: Sefik Ilkin Serengil <serengil@gmail.com>
Date: Sat, 23 Dec 2023 12:46:15 +0000
Subject: [PATCH] load module for after python 3.11

---
 README.md                        |  4 ++--
 chefboost/Chefboost.py           |  6 +++---
 chefboost/commons/functions.py   |  7 +++----
 chefboost/commons/module.py      | 29 +++++++++++++++++++++++++++++
 chefboost/training/Training.py   | 12 +++++-------
 chefboost/tuning/adaboost.py     | 12 +++++-------
 chefboost/tuning/gbm.py          | 17 +++++++----------
 chefboost/tuning/randomforest.py | 11 +++++------
 8 files changed, 59 insertions(+), 39 deletions(-)
 create mode 100644 chefboost/commons/module.py

diff --git a/README.md b/README.md
index fc2eef2..253c8d4 100644
--- a/README.md
+++ b/README.md
@@ -85,8 +85,8 @@ prediction = chef.predict(model, param = ['Sunny', 'Hot', 'High', 'Weak'])
 You can consume built decision trees directly as well. In this way, you can restore already built decision trees and skip learning steps, or apply [transfer learning](https://youtu.be/9hX8ir7_ZtA). Loaded trees offer you findDecision method to test for new instances.
 
 ```python
-moduleName = "outputs/rules/rules" #this will load outputs/rules/rules.py
-tree = chef.restoreTree(moduleName)
+module_name = "outputs/rules/rules" #this will load outputs/rules/rules.py
+tree = chef.restoreTree(module_name)
 prediction = tree.findDecision(['Sunny', 'Hot', 'High', 'Weak'])
 ```
 
diff --git a/chefboost/Chefboost.py b/chefboost/Chefboost.py
index f45c6ac..bd9ef1e 100644
--- a/chefboost/Chefboost.py
+++ b/chefboost/Chefboost.py
@@ -443,16 +443,16 @@ def load_model(file_name: str = "model.pkl") -> dict:
     return model
 
 
-def restoreTree(moduleName) -> dict:
+def restoreTree(module_name) -> Any:
     """
     Load built model from set of decision rules
     Args:
-        moduleName (str): e.g. outputs/rules/rules to restore outputs/rules/rules.py
+        module_name (str): e.g. outputs/rules/rules to restore outputs/rules/rules.py
     Returns:
             built model (dict)
     """
 
-    return functions.restoreTree(moduleName)
+    return functions.restoreTree(module_name)
 
 
 def feature_importance(rules: Union[str, list]) -> pd.DataFrame:
diff --git a/chefboost/commons/functions.py b/chefboost/commons/functions.py
index 3df4138..e2e029a 100644
--- a/chefboost/commons/functions.py
+++ b/chefboost/commons/functions.py
@@ -1,5 +1,4 @@
 import pathlib
-import imp  # pylint: disable=deprecated-module
 import os
 from os import path
 import multiprocessing
@@ -7,6 +6,7 @@
 import numpy as np
 from chefboost import Chefboost as cb
 from chefboost.commons.logger import Logger
+from chefboost.commons.module import load_module
 
 # pylint: disable=no-else-return, broad-except
 
@@ -23,9 +23,8 @@ def bulk_prediction(df, model):
     df["Prediction"] = predictions
 
 
-def restoreTree(moduleName):
-    fp, pathname, description = imp.find_module(moduleName)
-    return imp.load_module(moduleName, fp, pathname, description)
+def restoreTree(module_name):
+    return load_module(module_name)
 
 
 def softmax(w):
diff --git a/chefboost/commons/module.py b/chefboost/commons/module.py
new file mode 100644
index 0000000..ca95860
--- /dev/null
+++ b/chefboost/commons/module.py
@@ -0,0 +1,29 @@
+import sys
+from types import ModuleType
+
+# pylint: disable=no-else-return
+
+
+def load_module(module_name: str) -> ModuleType:
+    """
+    Load python module with its name
+    Args:
+        module_name (str): module name without .py extension
+    Returns:
+        module (ModuleType)
+    """
+    if sys.version_info >= (3, 11):
+        import importlib.util
+
+        spec = importlib.util.find_spec(module_name)
+        if spec is None:
+            raise ImportError(f"Module '{module_name}' not found")
+
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        return module
+    else:
+        import imp  # pylint: disable=deprecated-module
+
+        fp, pathname, description = imp.find_module(module_name)
+        return imp.load_module(module_name, fp, pathname, description)
diff --git a/chefboost/training/Training.py b/chefboost/training/Training.py
index 0aa9f1e..ee37ca7 100644
--- a/chefboost/training/Training.py
+++ b/chefboost/training/Training.py
@@ -1,5 +1,4 @@
 import math
-import imp  # pylint:disable=deprecated-module
 import uuid
 import json
 import copy
@@ -15,6 +14,7 @@
 from chefboost.training import Preprocess
 from chefboost.commons import functions
 from chefboost.commons.logger import Logger
+from chefboost.commons.module import load_module
 
 # pylint: disable=too-many-function-args, unused-argument
 
@@ -668,9 +668,8 @@ def buildDecisionTree(
         ):
             # this is reguler decision tree. find accuracy here.
 
-            moduleName = "outputs/rules/rules"
-            fp, pathname, description = imp.find_module(moduleName)
-            myrules = imp.load_module(moduleName, fp, pathname, description)  # rules0
+            module_name = "outputs/rules/rules"
+            myrules = load_module(module_name) # rules0
             models.append(myrules)
 
     return models
@@ -682,9 +681,8 @@ def findPrediction(row):
     for j in range(0, num_of_features):
         params.append(row[j])
 
-    moduleName = "outputs/rules/rules"
-    fp, pathname, description = imp.find_module(moduleName)
-    myrules = imp.load_module(moduleName, fp, pathname, description)  # rules0
+    module_name = "outputs/rules/rules"
+    myrules = load_module(module_name) # rules0
 
     prediction = myrules.findDecision(params)
     return prediction
diff --git a/chefboost/tuning/adaboost.py b/chefboost/tuning/adaboost.py
index 87b0dd3..cda488e 100644
--- a/chefboost/tuning/adaboost.py
+++ b/chefboost/tuning/adaboost.py
@@ -1,4 +1,3 @@
-import imp  # pylint: disable=deprecated-module
 import math
 
 import pandas as pd
@@ -8,6 +7,7 @@
 from chefboost.commons import functions
 from chefboost.training import Training
 from chefboost.commons.logger import Logger
+from chefboost.commons.module import load_module
 
 # pylint: disable=unused-argument
 
@@ -23,9 +23,8 @@ def findPrediction(row):
     for j in range(0, columns - 1):
         params.append(row[j])
 
-    moduleName = f"outputs/rules/rules_{int(epoch)}"
-    fp, pathname, description = imp.find_module(moduleName)
-    myrules = imp.load_module(moduleName, fp, pathname, description)
+    module_name = f"outputs/rules/rules_{int(epoch)}"
+    myrules = load_module(module_name)
 
     prediction = functions.sign(myrules.findDecision(params))
 
@@ -79,9 +78,8 @@ def apply(df, config, header, dataset_features, validation_df=None, process_id=N
 
         # ---------------------------------------
 
-        moduleName = "outputs/rules/rules_" + str(i)
-        fp, pathname, description = imp.find_module(moduleName)
-        myrules = imp.load_module(moduleName, fp, pathname, description)
+        module_name = "outputs/rules/rules_" + str(i)
+        myrules = load_module(module_name)
         models.append(myrules)
 
         # ---------------------------------------
diff --git a/chefboost/tuning/gbm.py b/chefboost/tuning/gbm.py
index 53a6098..95bb17f 100644
--- a/chefboost/tuning/gbm.py
+++ b/chefboost/tuning/gbm.py
@@ -1,4 +1,3 @@
-import imp  # pylint: disable=deprecated-module
 import gc
 
 import pandas as pd
@@ -8,6 +7,7 @@
 from chefboost.commons import functions
 from chefboost.training import Training
 from chefboost.commons.logger import Logger
+from chefboost.commons.module import load_module
 
 # pylint: disable=unused-argument
 
@@ -23,9 +23,8 @@ def findPrediction(row):
     for j in range(0, columns - 1):
         params.append(row[j])
 
-    moduleName = f"outputs/rules/rules{epoch - 1}"
-    fp, pathname, description = imp.find_module(moduleName)
-    myrules = imp.load_module(moduleName, fp, pathname, description)
+    module_name = f"outputs/rules/rules{epoch - 1}"
+    myrules = load_module(module_name)
 
     # prediction = int(myrules.findDecision(params))
     prediction = myrules.findDecision(params)
@@ -81,9 +80,8 @@ def regressor(df, config, header, dataset_features, validation_df=None, process_
         # run data(i-1) and rules(i-1), save data1
 
         # dynamic import
-        moduleName = f"outputs/rules/rules{index - 1}"
-        fp, pathname, description = imp.find_module(moduleName)
-        myrules = imp.load_module(moduleName, fp, pathname, description)  # rules0
+        module_name = f"outputs/rules/rules{index - 1}"
+        myrules = load_module(module_name)  # rules0
 
         models.append(myrules)
 
@@ -237,9 +235,8 @@ def classifier(df, config, header, dataset_features, validation_df=None, process
             # ----------------------------
 
             # dynamic import
-            moduleName = "outputs/rules/rules-for-" + current_class + "-round-" + str(epoch)
-            fp, pathname, description = imp.find_module(moduleName)
-            myrules = imp.load_module(moduleName, fp, pathname, description)  # rules0
+            module_name = "outputs/rules/rules-for-" + current_class + "-round-" + str(epoch)
+            myrules = load_module(module_name)  # rules0
 
             models.append(myrules)
 
diff --git a/chefboost/tuning/randomforest.py b/chefboost/tuning/randomforest.py
index 743a8f0..d6dfe6e 100644
--- a/chefboost/tuning/randomforest.py
+++ b/chefboost/tuning/randomforest.py
@@ -1,11 +1,11 @@
 import multiprocessing
 from contextlib import closing
-import imp  # pylint: disable=deprecated-module
 
 from tqdm import tqdm
 
 from chefboost.commons import functions
 from chefboost.training import Training
+from chefboost.commons.module import load_module
 
 # pylint: disable=unused-argument
 
@@ -31,8 +31,8 @@ def apply(df, config, header, dataset_features, validation_df=None, process_id=N
 
         root = 1
 
-        moduleName = "outputs/rules/rule_" + str(i)
-        file = moduleName + ".py"
+        module_name = "outputs/rules/rule_" + str(i)
+        file = module_name + ".py"
 
         functions.createFile(file, header)
 
@@ -85,9 +85,8 @@ def apply(df, config, header, dataset_features, validation_df=None, process_id=N
     # -------------------------------
     # collect models for both serial and parallel here
     for i in range(0, num_of_trees):
-        moduleName = "outputs/rules/rule_" + str(i)
-        fp, pathname, description = imp.find_module(moduleName)
-        myrules = imp.load_module(moduleName, fp, pathname, description)
+        module_name = "outputs/rules/rule_" + str(i)
+        myrules = load_module(module_name)
         models.append(myrules)
 
     # -------------------------------