Techtonique
diff --git a/‎examples/hist_genboost_classifier.py
Lines changed: 55 additions & 0 deletions b/‎examples/hist_genboost_classifier.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎examples/hist_genboost_regressor.py
Lines changed: 45 additions & 0 deletions b/‎examples/hist_genboost_regressor.py
Lines changed: 45 additions & 0 deletions
diff --git a/‎examples/lazy_histbooster_classification.py
Lines changed: 29 additions & 0 deletions b/‎examples/lazy_histbooster_classification.py
Lines changed: 29 additions & 0 deletions
diff --git a/‎examples/lazy_histbooster_regression.py
Lines changed: 59 additions & 0 deletions b/‎examples/lazy_histbooster_regression.py
Lines changed: 59 additions & 0 deletions
diff --git a/‎mlsauce/booster/_booster_classifier.py
Lines changed: 40 additions & 4 deletions b/‎mlsauce/booster/_booster_classifier.py
Lines changed: 40 additions & 4 deletions
@@ -0,0 +1,55 @@
+import numpy as np 
+from sklearn.datasets import load_digits, load_breast_cancer, load_wine, load_iris
+from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
+from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
+from sklearn.kernel_ridge import KernelRidge
+from sklearn.linear_model import LinearRegression
+from time import time
+from os import chdir
+from sklearn import metrics
+import os 
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+print(os.path.relpath(os.path.dirname(__file__)))
+
+#wd="/workspace/mlsauce/mlsauce/examples"
+#
+#chdir(wd)
+
+import mlsauce as ms
+
+#ridge
+
+print("\n")
+print("GenericBoosting Decision tree -----")
+print("\n")
+
+print("\n")
+print("breast_cancer data -----")
+
+# data 1
+breast_cancer = load_breast_cancer()
+X = breast_cancer.data
+y = breast_cancer.target
+# split data into training test and test set
+np.random.seed(15029)
+X_train, X_test, y_train, y_test = train_test_split(X, y, 
+                                                    test_size=0.2)
+
+clf = ExtraTreeRegressor()
+clf2 = LinearRegression()
+
+obj = ms.HistGenericBoostingClassifier(clf)
+print(obj.get_params())
+start = time()
+obj.fit(X_train, y_train)
+print(time()-start)
+start = time()
+print(obj.score(X_test, y_test))
+print(time()-start)
+
+print(obj.obj['loss'])
+
+print(obj.obj['fit_obj_i'])
+
@@ -0,0 +1,45 @@
+import subprocess
+import sys
+import os 
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+
+subprocess.check_call([sys.executable, "-m", "pip", "install", "matplotlib"])
+
+import mlsauce as ms
+import numpy as np 
+import matplotlib.pyplot as plt
+from sklearn.datasets import load_diabetes, fetch_california_housing
+from sklearn.linear_model import Ridge, LinearRegression
+from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
+from sklearn.tree import ExtraTreeRegressor
+from time import time
+from os import chdir
+from sklearn import metrics
+
+
+print("\n")
+print("diabetes data -----")
+
+regr = ExtraTreeRegressor()
+
+diabetes = load_diabetes()
+X = diabetes.data
+y = diabetes.target
+# split data into training test and test set
+np.random.seed(15029)
+X_train, X_test, y_train, y_test = train_test_split(X, y, 
+                                                    test_size=0.2)
+
+
+obj = ms.HistGenericBoostingRegressor(regr)
+print(obj.get_params())
+start = time()
+obj.fit(X_train, y_train)
+print(time()-start)
+start = time()
+print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
+print(time()-start)
+print(obj.obj['loss'])
+
@@ -0,0 +1,29 @@
+import os 
+import mlsauce as ms 
+from sklearn.datasets import load_breast_cancer, load_iris, load_wine, load_digits
+from sklearn.model_selection import train_test_split
+from time import time
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+#load_models = [load_breast_cancer, load_iris, load_wine, load_digits]
+load_models = [load_breast_cancer, load_iris, load_wine]
+#load_models = [load_digits]
+
+for model in load_models: 
+
+    data = model()
+    X = data.data
+    y= data.target
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 13)
+
+    clf = ms.LazyBoostingClassifier(verbose=0, ignore_warnings=True, #n_jobs=2,
+                                    custom_metric=None, preprocess=False)
+
+    start = time()
+    models, predictioms = clf.fit(X_train, X_test, y_train, y_test, hist=True)
+    print(f"\nElapsed: {time() - start} seconds\n")
+
+    print(models)
+
@@ -0,0 +1,59 @@
+import os 
+import mlsauce as ms
+import numpy as np
+from sklearn.datasets import load_diabetes
+from sklearn.datasets import fetch_california_housing
+from sklearn.model_selection import train_test_split
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+data = load_diabetes()
+X = data.data
+y= data.target
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)
+
+regr = ms.LazyBoostingRegressor(verbose=0, ignore_warnings=True, #n_jobs=2,
+                                custom_metric=None, preprocess=True)
+models, predictioms = regr.fit(X_train, X_test, y_train, y_test)
+model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
+print(models)
+
+data = fetch_california_housing()
+X = data.data[0:1000,:]
+y= data.target[0:1000]
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 123)
+
+regr = ms.LazyBoostingRegressor(verbose=0, ignore_warnings=True, 
+                                custom_metric=None, preprocess=True)
+models, predictioms = regr.fit(X_train, X_test, y_train, y_test, hist=True)
+model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
+print(models)
+
+
+from sklearn.datasets import fetch_openml
+
+# Load the dataset from OpenML
+boston = fetch_openml(name='boston', version=1, as_frame=True)
+
+# Get the features and target
+X = boston.data
+y = boston.target
+
+# Display the first few rows
+print(X.head())
+print(y.head())
+
+np.random.seed(1509)
+X_train, X_test, y_train, y_test = train_test_split(X, y, 
+                                                    test_size=0.2)
+
+X_train = X_train.astype(np.float64)
+X_test = X_test.astype(np.float64)
+y_train = y_train.astype(np.float64)
+y_test = y_test.astype(np.float64)
+
+regr = ms.LazyBoostingRegressor(verbose=0, ignore_warnings=True, #n_jobs=2,
+                                custom_metric=None, preprocess=True)
+models, predictioms = regr.fit(X_train, X_test, y_train, y_test, hist=True)
+model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
+print(models)
@@ -11,7 +11,7 @@
     from . import _boosterc as boosterc
 except ImportError:
     import _boosterc as boosterc
-from ..utils import cluster, check_and_install
+from ..utils import cluster, check_and_install, get_histo_features
 
 
 class LSBoostClassifier(BaseEstimator, ClassifierMixin):
@@ -83,6 +83,12 @@ class LSBoostClassifier(BaseEstimator, ClassifierMixin):
         weights_distr: str
             distribution of weights for constructing the model's hidden layer;
             currently 'uniform', 'gaussian'
+        
+        hist: bool
+            indicates whether histogram features are used or not (default is False)
+        
+        bins: int or str
+            number of bins for histogram features (same as numpy.histogram, default is 'auto')
 
     Examples:
 
@@ -307,9 +313,14 @@ def __init__(
         degree=None,
         weights_distr="uniform",
         base_model=None,
+        hist=False,
+        bins="auto",
     ):
 
         self.base_model = base_model
+        self.hist = hist
+        self.bins = bins
+        self.hist_bins_ = None
 
         if n_clusters > 0:
             assert clustering_method in (
@@ -391,6 +402,14 @@ def fit(self, X, y, **kwargs):
 
         if isinstance(X, pd.DataFrame):
             X = X.values
+        
+        if self.hist == True:
+            X, self.hist_bins_ = get_histo_features(X)
+        
+        if isinstance(y, pd.Series):
+            y = y.values.ravel()
+        else:
+            y = y.ravel()
 
         if self.degree is not None:
             assert isinstance(self.degree, int), "`degree` must be an integer"
@@ -433,7 +452,8 @@ def fit(self, X, y, **kwargs):
             obj=self.base_model,
         )
 
-        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
+        self.classes_ = np.unique(y)  # for compatibility with sklearn
+        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
         self.n_estimators = self.obj["n_estimators"]
         return self
 
@@ -476,6 +496,9 @@ def predict_proba(self, X, **kwargs):
         if isinstance(X, pd.DataFrame):
             X = X.values
 
+        if self.hist == True:
+            X = get_histo_features(X, bins=self.hist_bins_)
+
         if self.degree is not None:
             X = self.poly_.transform(X)
 
@@ -543,7 +566,8 @@ def update(self, X, y, eta=0.9):
             )
 
         self.obj = boosterc.update_booster(
-            self.obj, np.asarray(X, order="C"), np.asarray(y, order="C"), eta
+            self.obj, np.asarray(X, order="C"), 
+            np.asarray(y, order="C").ravel(), eta
         )
 
         return self
@@ -621,6 +645,12 @@ class GenericBoostingClassifier(LSBoostClassifier):
         weights_distr: str
             distribution of weights for constructing the model's hidden layer;
             currently 'uniform', 'gaussian'
+        
+        hist: bool
+            indicates whether histogram features are used or not (default is False)
+        
+        bins: int or str
+            number of bins for histogram features (same as numpy.histogram, default is 'auto')
 
     """
 
@@ -647,8 +677,14 @@ def __init__(
         cluster_scaling="standard",
         degree=None,
         weights_distr="uniform",
+        hist=False,
+        bins="auto",
     ):
         self.base_model = base_model
+        self.hist = hist
+        self.bins = bins
+        self.hist_bins_ = None
+
         super().__init__(
             n_estimators=n_estimators,
             learning_rate=learning_rate,
@@ -671,4 +707,4 @@ def __init__(
             degree=degree,
             weights_distr=weights_distr,
             base_model=self.base_model,
-        )
+        )