From e7d64dab82864deed4b3a3f16f5007fec0b5202f Mon Sep 17 00:00:00 2001
From: vecxoz <vecxoz@gmail.com>
Date: Mon, 12 Aug 2019 16:07:25 +0300
Subject: [PATCH 1/3] v0.4.0

---
 .travis.yml                                   |   7 +-
 CHANGELOG.md                                  |  53 ++++++++
 LICENSE.txt                                   |   2 +-
 PY2.md                                        |  22 ++++
 README.md                                     |  15 +--
 .../04_sklearn_api_regression_pipeline.ipynb  |   4 +-
 setup.py                                      |  13 +-
 tests/test_func_api_classification_binary.py  |  68 ++++++++++
 ...test_func_api_classification_multiclass.py |  69 +++++++++++
 tests/test_func_api_regression.py             | 117 +++++++++++++++++-
 tests/test_sklearn_api_regression.py          |  68 ++++++++++
 vecstack/__init__.py                          |   2 +-
 vecstack/core.py                              |  32 ++---
 vecstack/coresk.py                            |  27 ++--
 14 files changed, 454 insertions(+), 45 deletions(-)
 create mode 100644 CHANGELOG.md
 create mode 100644 PY2.md

diff --git a/.travis.yml b/.travis.yml
index 6eeab9e..30d59e9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,10 +2,14 @@
 # check if .travis.yml is valid: http://lint.travis-ci.org/
 # to skip build for given commit put [ci skip] or [skip ci] in commit message
 
+# required for Python >= 3.7
+dist: xenial
+
 language: python
 
-# versions supported by scikit-learn
+# versions supported by scikit-learn and some additional versions
 python:
+  - "3.7"
   - "3.6"
   - "3.5"
   - "3.4"
@@ -16,6 +20,7 @@ branches:
   only:
   - master
   - dev
+  - py2
 
 install:
   - pip install numpy
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..c3afb9b
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,53 @@
+# Changelog
+
+### v0.4.0 -- August 12, 2019
+
+Since v0.4.0 vecstack provides official support for Python 3.5 and higher only,  
+but still there is unofficial support for Python 2.7 and Python 3.4.  
+Please see [details](https://github.com/vecxoz/vecstack/blob/master/PY2.md).
+
+Scikit-learn API:
+* Fixed #31. `sklearn.externals.six` deprecation
+* Fixed #29. Out-of-memory in `np.random.choice` for very large ranges
+
+Functional API:
+* Feature #18. Added support for N-dimensional input. Useful for convolutional nets.
+* Added aliases for `mode` parameter values which correspond to respective `variant` parameter values of `StackingTransformer`:
+  * 'oof_pred_bag' == 'A'
+  * 'oof_pred' == 'B'
+
+### v0.3.0 -- April 6, 2018
+
+Introducing Scikit-learn API: `StackingTransformer`
+
+* Standard transformer class with `fit` and `transform` methods
+* Compatible with `Pipeline` and `FeatureUnion`
+
+### v0.2.2 -- February 23, 2018
+
+* Fixed #5. Wrong behavior during sparse matrix processing
+* Improved input data validation
+* Improved sparse matrix processing
+
+### v0.2.1 -- January 24, 2018 -- Maintenance release
+
+* Minor modifications
+
+### v0.2 -- January 23, 2018
+
+New features:
+
+* Classification with probabilities
+* Modes: compute only what you need (only OOF, only predictions, both, etc.)
+* Save resulting arrays and log with model parameters
+
+### v0.1 -- November 22, 2016 -- Initial release
+
+Features:
+
+* Functional stacking API
+* Regression
+* Classification with class labels
+* Ordinary and stratified k-fold split
+* User-defined metric
+* User-defined transformations for target and prediction
diff --git a/LICENSE.txt b/LICENSE.txt
index 5dd5141..38165f0 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,7 +1,7 @@
 MIT License
 
 Vecstack. Python package for stacking (machine learning technique)
-Copyright (c) 2016-2018 Igor Ivanov
+Copyright (c) 2016-2019 Igor Ivanov
 Email: vecxoz@gmail.com
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
diff --git a/PY2.md b/PY2.md
new file mode 100644
index 0000000..94c5945
--- /dev/null
+++ b/PY2.md
@@ -0,0 +1,22 @@
+### Python 3.x
+
+Since v0.4.0 vecstack provides official support for Python 3.5 and higher only,  
+but still there is unofficial support for Python 2.7 and Python 3.4. See details below.  
+
+The reason for these changes is global movement in Python 3.x direction.  
+Vecstack depends on scikit-learn which has already stopped support for Python < 3.5.  
+Scikit-learn v0.20.x is the last version supporting Python 2.7 and Python 3.4.  
+Vecstack follows this direction as well.  
+Please see [python3statement.org](https://python3statement.org/) for more details.  
+
+### Unofficial support for Python 2.7 and Python 3.4
+
+You can still install and run latest vecstack on Python 2.7 and Python 3.4.  
+NOTE. It will require legacy versions of the following packages:   
+* numpy<1.17
+* scipy<1.3
+* scikit-learn>=0.18,<0.21
+There is a dedicated branch on GitHub called `py2` with appropriate requirements in `setup.py`.  
+Installation:  
+
+`pip install https://github.com/vecxoz/vecstack/archive/py2.zip`
\ No newline at end of file
diff --git a/README.md b/README.md
index c49b925..585a1d5 100644
--- a/README.md
+++ b/README.md
@@ -22,10 +22,10 @@ Convenient way to automate OOF computation, prediction and bagging using any num
     * Predict [class labels or probabilities](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L119) in classification task
     * Apply any [user-defined metric](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L124)
     * Apply any [user-defined transformations](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L87) for target and prediction
-    * Python 2, Python 3
+    * Python 3.5 and higher, [unofficial support for Python 2.7 and 3.4](https://github.com/vecxoz/vecstack/blob/master/PY2.md)
     * Win, Linux, Mac
     * [MIT license](https://github.com/vecxoz/vecstack/blob/master/LICENSE.txt)
-    * Depends on **numpy**, **scipy**, **scikit-learn>=18.0**
+    * Depends on **numpy**, **scipy**, **scikit-learn>=0.18**
 
 # Get started
 * [FAQ](https://github.com/vecxoz/vecstack#stacking-faq)
@@ -292,14 +292,15 @@ Stacking API comparison:
 | Estimator implementation restrictions | Must have only `fit` and `predict` (`predict_proba`) methods | Must be fully scikit-learn compatible |
 | `NaN` and `inf` in input data | Allowed | Not allowed |
 | Can automatically save OOF and log in files | Yes | No |
+| Input dimensionality (`X_train`, `X_test`) | Arbitrary | 2-D |
     
 ### 21. How do parameters of `stacking` function and `StackingTransformer` correspond?
 
-| **stacking function**   | **StackingTransformer**           |
-|-------------------------|-----------------------------------|
-| `models=[Ridge()]`      | `estimators=[('ridge', Ridge())]` |
-| `mode='oof_pred_bag'`   | `variant='A'`                     |
-| `mode='oof_pred'`       | `variant='B'`                     |
+| **stacking function**                 | **StackingTransformer**           |
+|---------------------------------------|-----------------------------------|
+| `models=[Ridge()]`                    | `estimators=[('ridge', Ridge())]` |
+| `mode='oof_pred_bag'` (alias `'A'`)   | `variant='A'`                     |
+| `mode='oof_pred'` (alias `'B'`)       | `variant='B'`                     |
     
 ### 22. Why Scikit-learn API was implemented as transformer and not predictor?
 
diff --git a/examples/04_sklearn_api_regression_pipeline.ipynb b/examples/04_sklearn_api_regression_pipeline.ipynb
index 9a63df8..f29f146 100644
--- a/examples/04_sklearn_api_regression_pipeline.ipynb
+++ b/examples/04_sklearn_api_regression_pipeline.ipynb
@@ -512,7 +512,7 @@
    "source": [
     "# 2. Pipeline\n",
     "\n",
-    "StackingTransformer is fully scikit-learn compatible so we can easily implement **arbitrary number of stacking layers** using Pipeline\n"
+    "StackingTransformer is fully scikit-learn compatible so we can easily implement **arbitrary number of stacking levels** using Pipeline\n"
    ]
   },
   {
@@ -535,7 +535,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# If we have several stacking layers our Pipeline steps would be:\n",
+    "# If we have several stacking levels our Pipeline steps would be:\n",
     "# steps = [('stack_L1', stack_L1),\n",
     "#          ('stack_L2', stack_L2),\n",
     "#          ('stack_L99', stack_L99), # :-)\n",
diff --git a/setup.py b/setup.py
index 7358454..96289c1 100644
--- a/setup.py
+++ b/setup.py
@@ -2,10 +2,15 @@
 
 from setuptools import setup
 
+long_desc = '''
+Python package for stacking (stacked generalization) featuring lightweight functional API and fully compatible scikit-learn API.
+Convenient way to automate OOF computation, prediction and bagging using any number of models.
+'''
+
 setup(name='vecstack',
-      version='0.3.0',
+      version='0.4.0',
       description='Python package for stacking (machine learning technique)',
-      long_description='Convenient way to automate OOF computation, prediction and bagging using any number of models',
+      long_description=long_desc,
       classifiers=[
           'License :: OSI Approved :: MIT License',
           'Operating System :: MacOS',
@@ -13,12 +18,10 @@
           'Operating System :: POSIX',
           'Operating System :: Unix',
           'Programming Language :: Python',
-          'Programming Language :: Python :: 2',
-          'Programming Language :: Python :: 2.7',
           'Programming Language :: Python :: 3',
-          'Programming Language :: Python :: 3.4',
           'Programming Language :: Python :: 3.5',
           'Programming Language :: Python :: 3.6',
+          'Programming Language :: Python :: 3.7',
           'Topic :: Scientific/Engineering',
           'Topic :: Scientific/Engineering :: Artificial Intelligence',
           'Topic :: Scientific/Engineering :: Information Analysis',
diff --git a/tests/test_func_api_classification_binary.py b/tests/test_func_api_classification_binary.py
index 5cc1a0e..6bc6419 100644
--- a/tests/test_func_api_classification_binary.py
+++ b/tests/test_func_api_classification_binary.py
@@ -63,6 +63,33 @@
 y_test = y[ind_test]
 
 
+# Create 4-dim data
+np.random.seed(42)
+X_train_4d = np.random.normal(size=(400, 8, 8, 3))
+X_test_4d = np.random.normal(size=(100, 8, 8, 3))
+y_train_4d = np.random.randint(n_classes, size=400)
+
+# Reshape 4-dim to 2-dim
+X_train_4d_unrolled = X_train_4d.reshape(X_train_4d.shape[0], -1)
+X_test_4d_unrolled = X_test_4d.reshape(X_test_4d.shape[0], -1)
+
+#------------------------------------------------------------------------------
+#------------------------------------------------------------------------------
+
+class LogisticRegressionUnrolled(LogisticRegression):
+    """
+    For tests related to N-dim input.
+    Estimator accepts N-dim array and reshape it to 2-dim array
+    """
+    def fit(self, X, y):
+        return super(LogisticRegressionUnrolled, self).fit(X.reshape(X.shape[0], -1), y)
+
+    def predict(self, X):
+        return super(LogisticRegressionUnrolled, self).predict(X.reshape(X.shape[0], -1))
+
+    def predict_proba(self, X):
+        return super(LogisticRegressionUnrolled, self).predict_proba(X.reshape(X.shape[0], -1))
+
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
 
@@ -775,7 +802,48 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         
         assert_array_equal(S_train_1, S_train_3)
         assert_array_equal(S_test_1, S_test_3)
+
+    def test_N_dim_input(self):
+        """
+        This is `test_oof_pred_bag_mode` function with `LogisticRegressionUnrolled` estimator
+        """
+        S_test_temp = np.zeros((X_test_4d_unrolled.shape[0], n_folds))
+        # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train_4d_unrolled, y_train_4d)):
+            # Split data and target
+            X_tr = X_train_4d_unrolled[tr_index]
+            y_tr = y_train_4d[tr_index]
+            X_te = X_train_4d_unrolled[te_index]
+            y_te = y_train_4d[te_index]
+            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            _ = model.fit(X_tr, y_tr)
+            S_test_temp[:, fold_counter] = model.predict(X_test_4d_unrolled)
+        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
     
+        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        S_train_1 = cross_val_predict(model, X_train_4d_unrolled, y = y_train_4d, cv = n_folds,
+            n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
+
+        models = [LogisticRegressionUnrolled(random_state=0, solver='liblinear', multi_class='ovr')]
+        S_train_2, S_test_2 = stacking(models, X_train_4d, y_train_4d, X_test_4d,
+            regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
+            mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)
+
+        # Load OOF from file
+        # Normally if cleaning is performed there is only one .npy file at given moment
+        # But if we have no cleaning there may be more then one file so we take the latest
+        file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
+        S = np.load(file_name)
+        S_train_3 = S[0]
+        S_test_3 = S[1]
+
+        assert_array_equal(S_train_1, S_train_2)
+        assert_array_equal(S_test_1, S_test_2)
+
+        assert_array_equal(S_train_1, S_train_3)
+        assert_array_equal(S_test_1, S_test_3)
+
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
 
diff --git a/tests/test_func_api_classification_multiclass.py b/tests/test_func_api_classification_multiclass.py
index f9e5fa5..8e3c3a5 100644
--- a/tests/test_func_api_classification_multiclass.py
+++ b/tests/test_func_api_classification_multiclass.py
@@ -60,6 +60,33 @@
 y_test = y[ind_test]
 
 
+# Create 4-dim data
+np.random.seed(42)
+X_train_4d = np.random.normal(size=(400, 8, 8, 3))
+X_test_4d = np.random.normal(size=(100, 8, 8, 3))
+y_train_4d = np.random.randint(n_classes, size=400)
+
+# Reshape 4-dim to 2-dim
+X_train_4d_unrolled = X_train_4d.reshape(X_train_4d.shape[0], -1)
+X_test_4d_unrolled = X_test_4d.reshape(X_test_4d.shape[0], -1)
+
+#------------------------------------------------------------------------------
+#------------------------------------------------------------------------------
+
+class LogisticRegressionUnrolled(LogisticRegression):
+    """
+    For tests related to N-dim input.
+    Estimator accepts N-dim array and reshape it to 2-dim array
+    """
+    def fit(self, X, y):
+        return super(LogisticRegressionUnrolled, self).fit(X.reshape(X.shape[0], -1), y)
+
+    def predict(self, X):
+        return super(LogisticRegressionUnrolled, self).predict(X.reshape(X.shape[0], -1))
+
+    def predict_proba(self, X):
+        return super(LogisticRegressionUnrolled, self).predict_proba(X.reshape(X.shape[0], -1))
+
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
 
@@ -772,7 +799,49 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         
         assert_array_equal(S_train_1, S_train_3)
         assert_array_equal(S_test_1, S_test_3)
+
+
+    def test_N_dim_input(self):
+        """
+        This is `test_oof_pred_bag_mode` function with `LogisticRegressionUnrolled` estimator
+        """
+        S_test_temp = np.zeros((X_test_4d_unrolled.shape[0], n_folds))
+        # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train_4d_unrolled, y_train_4d)):
+            # Split data and target
+            X_tr = X_train_4d_unrolled[tr_index]
+            y_tr = y_train_4d[tr_index]
+            X_te = X_train_4d_unrolled[te_index]
+            y_te = y_train_4d[te_index]
+            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            _ = model.fit(X_tr, y_tr)
+            S_test_temp[:, fold_counter] = model.predict(X_test_4d_unrolled)
+        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
     
+        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        S_train_1 = cross_val_predict(model, X_train_4d_unrolled, y = y_train_4d, cv = n_folds,
+            n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
+
+        models = [LogisticRegressionUnrolled(random_state=0, solver='liblinear', multi_class='ovr')]
+        S_train_2, S_test_2 = stacking(models, X_train_4d, y_train_4d, X_test_4d,
+            regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
+            mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)
+
+        # Load OOF from file
+        # Normally if cleaning is performed there is only one .npy file at given moment
+        # But if we have no cleaning there may be more then one file so we take the latest
+        file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
+        S = np.load(file_name)
+        S_train_3 = S[0]
+        S_test_3 = S[1]
+
+        assert_array_equal(S_train_1, S_train_2)
+        assert_array_equal(S_test_1, S_test_2)
+
+        assert_array_equal(S_train_1, S_train_3)
+        assert_array_equal(S_test_1, S_test_3)
+
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
 
diff --git a/tests/test_func_api_regression.py b/tests/test_func_api_regression.py
index ed6c047..ca4432c 100644
--- a/tests/test_func_api_regression.py
+++ b/tests/test_func_api_regression.py
@@ -136,7 +136,34 @@ def test_oof_pred_mode(self):
         
         assert_array_equal(S_train_1, S_train_3)
         assert_array_equal(S_test_1, S_test_3)
-        
+
+    def test_B_mode(self):
+        """ 'B' is alias for 'oof_pred' """
+        model = LinearRegression()
+        S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds,
+            n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
+        _ = model.fit(X_train, y_train)
+        S_test_1 = model.predict(X_test).reshape(-1, 1)
+
+        models = [LinearRegression()]
+        S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
+            regression = True, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
+            mode = 'B', random_state = 0, verbose = 0)
+
+        # Load OOF from file
+        # Normally if cleaning is performed there is only one .npy file at given moment
+        # But if we have no cleaning there may be more then one file so we take the latest
+        file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
+        S = np.load(file_name)
+        S_train_3 = S[0]
+        S_test_3 = S[1]
+
+        assert_array_equal(S_train_1, S_train_2)
+        assert_array_equal(S_test_1, S_test_2)
+
+        assert_array_equal(S_train_1, S_train_3)
+        assert_array_equal(S_test_1, S_test_3)
+
     def test_oof_mode(self):
 
         model = LinearRegression()
@@ -226,6 +253,44 @@ def test_oof_pred_bag_mode(self):
         
         assert_array_equal(S_train_1, S_train_3)
         assert_array_equal(S_test_1, S_test_3)
+
+    def test_A_mode(self):
+        """ 'A' is alias for 'oof_pred_bag' """
+        S_test_temp = np.zeros((X_test.shape[0], n_folds))
+        kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
+            # Split data and target
+            X_tr = X_train[tr_index]
+            y_tr = y_train[tr_index]
+            X_te = X_train[te_index]
+            y_te = y_train[te_index]
+            model = LinearRegression()
+            _ = model.fit(X_tr, y_tr)
+            S_test_temp[:, fold_counter] = model.predict(X_test)
+        S_test_1 = np.mean(S_test_temp, axis = 1).reshape(-1, 1)
+
+        model = LinearRegression()
+        S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds,
+            n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
+
+        models = [LinearRegression()]
+        S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
+            regression = True, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
+            mode = 'A', random_state = 0, verbose = 0)
+
+        # Load OOF from file
+        # Normally if cleaning is performed there is only one .npy file at given moment
+        # But if we have no cleaning there may be more then one file so we take the latest
+        file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
+        S = np.load(file_name)
+        S_train_3 = S[0]
+        S_test_3 = S[1]
+
+        assert_array_equal(S_train_1, S_train_2)
+        assert_array_equal(S_test_1, S_test_2)
+
+        assert_array_equal(S_train_1, S_train_3)
+        assert_array_equal(S_test_1, S_test_3)
     
     def test_pred_bag_mode(self):
         
@@ -811,9 +876,9 @@ def test_oof_pred_mode_no_get_params(self):
         assert_array_equal(S_train_1, S_train_3)
         assert_array_equal(S_test_1, S_test_3)
     
-    #-------------------------------------------------------------------------------
+    #--------------------------------------------------------------------------
     # Test inconsistent data shape or type
-    #-------------------------------------------------------------------------------
+    #--------------------------------------------------------------------------
     def test_inconsistent_data(self):
         # nan or inf in y
         y_train_nan = y_train.copy()
@@ -828,7 +893,53 @@ def test_inconsistent_data(self):
         # X_train and y_train shape nismatch
         assert_raises(ValueError, stacking, [LinearRegression()], 
                       X_train, y_train[:10], X_test)
+
+    #---------------------------------------------------------------------------
+    # Test small input
+    #---------------------------------------------------------------------------
+
+    def test_small_input(self):
+        """
+        This is `test_oof_pred_bag_mode` with small input data
+        Train: 20 examples
+        Test: 10 examples
+        """
+        S_test_temp = np.zeros((X_test[:10].shape[0], n_folds))
+        kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train[:20], y_train[:20])):
+            # Split data and target
+            X_tr = X_train[:20][tr_index]
+            y_tr = y_train[:20][tr_index]
+            X_te = X_train[:20][te_index]
+            y_te = y_train[:20][te_index]
+            model = LinearRegression()
+            _ = model.fit(X_tr, y_tr)
+            S_test_temp[:, fold_counter] = model.predict(X_test[:10])
+        S_test_1 = np.mean(S_test_temp, axis = 1).reshape(-1, 1)
     
+        model = LinearRegression()
+        S_train_1 = cross_val_predict(model, X_train[:20], y = y_train[:20], cv = n_folds, 
+            n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
+
+        models = [LinearRegression()]
+        S_train_2, S_test_2 = stacking(models, X_train[:20], y_train[:20], X_test[:10], 
+            regression = True, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
+            mode = 'oof_pred_bag', random_state = 0, verbose = 0)
+
+        # Load OOF from file
+        # Normally if cleaning is performed there is only one .npy file at given moment
+        # But if we have no cleaning there may be more then one file so we take the latest
+        file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
+        S = np.load(file_name)
+        S_train_3 = S[0]
+        S_test_3 = S[1]
+
+        assert_array_equal(S_train_1, S_train_2)
+        assert_array_equal(S_test_1, S_test_2)
+
+        assert_array_equal(S_train_1, S_train_3)
+        assert_array_equal(S_test_1, S_test_3)
+
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
 
diff --git a/tests/test_sklearn_api_regression.py b/tests/test_sklearn_api_regression.py
index 9b12184..d7e9bad 100644
--- a/tests/test_sklearn_api_regression.py
+++ b/tests/test_sklearn_api_regression.py
@@ -1640,6 +1640,24 @@ def test_check_identity(self):
         assert_raises(AssertionError, assert_raises, ValueError, stack._check_identity, X_train)
         # ``X`` argument is INcorrect - MUST raise
         assert_raises(ValueError, stack._check_identity, 5)
+
+    # -------------------------------------------------------------------------
+    # Test ``_random_choice`` method
+    # -------------------------------------------------------------------------
+    def test_random_choice(self):
+        # fit then transform
+        estimators = [('lr', LinearRegression())]
+        stack = StackingTransformer(estimators, regression=True,
+                                    n_folds=n_folds, shuffle=False,
+                                    variant='B', random_state=0,
+                                    verbose=0)
+        stack = stack.fit(X_train, y_train)
+        # very large range - must NOT raise
+        assert_raises(AssertionError, assert_raises, ValueError, stack._random_choice, 19999999999, 1000)
+        # ``size`` is less than ``n`` - must NOT raise
+        assert_raises(AssertionError, assert_raises, ValueError, stack._random_choice, 200, 20)
+        # ``size`` is greater than ``n`` - MUST raise
+        assert_raises(ValueError, stack._random_choice, 20, 200)
         
     # -------------------------------------------------------------------------
     # Test case where X_test has the same shape as X_train
@@ -1678,6 +1696,56 @@ def test_x_test_has_same_shape(self):
         assert_array_equal(S_train_1, S_train_3)
         assert_array_equal(S_test_1, S_test_3)
 
+    # -------------------------------------------------------------------------
+    # Test small input
+    # -------------------------------------------------------------------------
+
+    def test_small_input(self):
+        """
+        This is `test_variant_A` with small input data
+        Train: 20 examples
+        Test: 10 examples
+        """
+        S_test_temp = np.zeros((X_test[:10].shape[0], n_folds))
+        kf = KFold(n_splits=n_folds, shuffle=False, random_state=0)
+        for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train[:20], y_train[:20])):
+            # Split data and target
+            X_tr = X_train[:20][tr_index]
+            y_tr = y_train[:20][tr_index]
+            # X_te = X_train[:20][te_index]
+            # y_te = y_train[:20][te_index]
+            model = LinearRegression()
+            model = model.fit(X_tr, y_tr)
+            S_test_temp[:, fold_counter] = model.predict(X_test[:10])
+        S_test_1 = np.mean(S_test_temp, axis=1).reshape(-1, 1)
+
+        model = LinearRegression()
+        S_train_1 = cross_val_predict(model, X_train[:20], y=y_train[:20],
+                                      cv=n_folds, n_jobs=1, verbose=0,
+                                      method='predict').reshape(-1, 1)
+
+        # fit then transform
+        estimators = [('lr', LinearRegression())]
+        stack = StackingTransformer(estimators, regression=True,
+                                    n_folds=n_folds, shuffle=False,
+                                    variant='A', random_state=0,
+                                    verbose=0)
+        stack = stack.fit(X_train[:20], y_train[:20])
+        S_train_2 = stack.transform(X_train[:20])
+        S_test_2 = stack.transform(X_test[:10])
+
+        # fit_transform
+        # also check refitting already fitted transformer
+        S_train_3 = stack.fit_transform(X_train[:20], y_train[:20])
+        S_test_3 = stack.transform(X_test[:10])
+
+        # compare
+        assert_array_equal(S_train_1, S_train_2)
+        assert_array_equal(S_test_1, S_test_2)
+
+        assert_array_equal(S_train_1, S_train_3)
+        assert_array_equal(S_test_1, S_test_3)
+
 # -----------------------------------------------------------------------------
 # -----------------------------------------------------------------------------
 
diff --git a/vecstack/__init__.py b/vecstack/__init__.py
index 3ecea4e..3ce96cb 100644
--- a/vecstack/__init__.py
+++ b/vecstack/__init__.py
@@ -38,7 +38,7 @@
 
 __author__ = 'Igor Ivanov > kaggle.com/vecxoz'
 __license__ = 'MIT'
-__version__ = '0.3.0'
+__version__ = '0.4.0'
 
 __all__ = ['stacking', 'StackingTransformer']
 
diff --git a/vecstack/core.py b/vecstack/core.py
index 8c1bb4b..5deaf67 100644
--- a/vecstack/core.py
+++ b/vecstack/core.py
@@ -145,13 +145,13 @@ def stacking(models, X_train, y_train, X_test,
         task with probabilities model must return probabilities 
         for each class (i.e. two columns).
         
-    X_train : numpy array or sparse matrix of shape [n_train_samples, n_features]
+    X_train : numpy array or sparse matrix of N-dim shape, e.g. 2-dim [n_train_samples, n_features]
         Training data
     
     y_train : numpy 1d array
         Target values
         
-    X_test : numpy array or sparse matrix of shape [n_test_samples, n_features]
+    X_test : numpy array or sparse matrix of N-dim shape, e.g. 2-dim [n_test_samples, n_features]
         Test data
         
     sample_weight : numpy array of shape [n_train_samples]
@@ -186,11 +186,11 @@ def stacking(models, X_train, y_train, X_test,
             respective backward transformation like numpy.expm1.
         Look at description of parameter transform_target
         
-    mode: str, default 'oof_pred_bag'
+    mode: str, default 'oof_pred_bag' (alias 'A')
         Note: for detailes see terminology below
         'oof' - return only oof
-        'oof_pred' - return oof and pred
-        'oof_pred_bag' - return oof and bagged pred
+        'oof_pred' (alias 'B') - return oof and pred
+        'oof_pred_bag' (alias 'A') - return oof and bagged pred
         'pred' - return pred only
         'pred_bag' - return bagged pred only
         Terminology:
@@ -427,7 +427,7 @@ def your_metric(y_true, y_pred):
     # <regression>
     regression = bool(regression)
     # If wrong <mode>
-    if mode not in ['pred', 'pred_bag', 'oof', 'oof_pred', 'oof_pred_bag']:
+    if mode not in ['pred', 'pred_bag', 'oof', 'oof_pred', 'B', 'oof_pred_bag', 'A']:
         raise ValueError('Parameter <mode> must be set properly')
     # <needs_proba>
     needs_proba = bool(needs_proba)
@@ -511,7 +511,7 @@ def your_metric(y_true, y_pred):
     #---------------------------------------------------------------------------
     # Create empty numpy arrays for OOF
     #---------------------------------------------------------------------------
-    if mode in ['oof_pred', 'oof_pred_bag']:
+    if mode in ['oof_pred', 'B', 'oof_pred_bag', 'A']:
         S_train = np.zeros(( X_train.shape[0], len(models) * n_classes ))
         S_test = np.zeros(( X_test.shape[0], len(models) * n_classes ))
     elif mode in ['oof']:
@@ -542,7 +542,7 @@ def your_metric(y_true, y_pred):
             print(model_str)
             
         # Create empty numpy array, which will contain temporary predictions for test set made in each fold
-        if mode in ['pred_bag', 'oof_pred_bag']:
+        if mode in ['pred_bag', 'oof_pred_bag', 'A']:
             S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         
         # Create empty array to store scores for each fold (to find mean)
@@ -551,7 +551,7 @@ def your_metric(y_true, y_pred):
         #-----------------------------------------------------------------------
         # Loop across folds
         #-----------------------------------------------------------------------
-        if mode in ['pred_bag', 'oof', 'oof_pred', 'oof_pred_bag']:
+        if mode in ['pred_bag', 'oof', 'oof_pred', 'B', 'oof_pred_bag', 'A']:
             for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
                 # Split data and target
                 X_tr = X_train[tr_index]
@@ -572,11 +572,11 @@ def your_metric(y_true, y_pred):
                 model = clone(model, safe=False)
                 
                 # Fit 1-st level model
-                if mode in ['pred_bag', 'oof', 'oof_pred', 'oof_pred_bag']:
+                if mode in ['pred_bag', 'oof', 'oof_pred', 'B', 'oof_pred_bag', 'A']:
                     _ = model_action(model, X_tr, y_tr, None, sample_weight = sample_weight_tr, action = 'fit', transform = transform_target)
                     
                 # Predict out-of-fold part of train set
-                if mode in ['oof', 'oof_pred', 'oof_pred_bag']:
+                if mode in ['oof', 'oof_pred', 'B', 'oof_pred_bag', 'A']:
                     if 'predict_proba' == action:
                         col_slice_model = slice(model_counter * n_classes, model_counter * n_classes + n_classes)
                     else:
@@ -584,7 +584,7 @@ def your_metric(y_true, y_pred):
                     S_train[te_index, col_slice_model] = model_action(model, None, None, X_te, action = action, transform = transform_pred)
                     
                 # Predict full test set in each fold
-                if mode in ['pred_bag', 'oof_pred_bag']:
+                if mode in ['pred_bag', 'oof_pred_bag', 'A']:
                     if 'predict_proba' == action:
                         col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
                     else:
@@ -592,7 +592,7 @@ def your_metric(y_true, y_pred):
                     S_test_temp[:, col_slice_fold] = model_action(model, None, None, X_test, action = action, transform = transform_pred)
                         
                 # Compute scores
-                if mode in ['oof', 'oof_pred', 'oof_pred_bag']:
+                if mode in ['oof', 'oof_pred', 'B', 'oof_pred_bag', 'A']:
                     if save_dir is not None or verbose > 0:
                         score = metric(y_te, S_train[te_index, col_slice_model])
                         scores = np.append(scores, score)
@@ -603,7 +603,7 @@ def your_metric(y_true, y_pred):
                         print(fold_str)
                 
         # Compute mean or mode of predictions for test set in bag modes
-        if mode in ['pred_bag', 'oof_pred_bag']:
+        if mode in ['pred_bag', 'oof_pred_bag', 'A']:
             if 'predict_proba' == action:
                 # Here we copute means of probabilirties for each class
                 for class_id in range(n_classes):
@@ -615,7 +615,7 @@ def your_metric(y_true, y_pred):
                     S_test[:, model_counter] = st.mode(S_test_temp, axis = 1)[0].ravel()
             
         # Compute scores: mean + std and full
-        if mode in ['oof', 'oof_pred', 'oof_pred_bag']:
+        if mode in ['oof', 'oof_pred', 'B', 'oof_pred_bag', 'A']:
             if save_dir is not None or verbose > 0:
                 sep_str = '    ----'
                 mean_str = '    MEAN:     [%.8f] + [%.8f]' % (np.mean(scores), np.std(scores))
@@ -630,7 +630,7 @@ def your_metric(y_true, y_pred):
                 print(full_str)
                 
         # Fit model on full train set and predict test set
-        if mode in ['pred', 'oof_pred']:
+        if mode in ['pred', 'oof_pred', 'B']:
             if verbose > 0:
                 print('    Fitting on full train set...\n')
             _ = model_action(model, X_train, y_train, None, sample_weight = sample_weight, action = 'fit', transform = transform_target)
diff --git a/vecstack/coresk.py b/vecstack/coresk.py
index 2a90e92..712f3c1 100644
--- a/vecstack/coresk.py
+++ b/vecstack/coresk.py
@@ -54,7 +54,7 @@
 from sklearn.metrics import mean_absolute_error
 from sklearn.metrics import accuracy_score
 from sklearn.metrics import log_loss
-from sklearn.externals import six
+from sklearn.metrics import mean_squared_error
 
 # -----------------------------------------------------------------------------
 # -----------------------------------------------------------------------------
@@ -853,7 +853,7 @@ def _estimator_action(self, estimator, X_train, y_train, X_test,
 
     def _random_choice(self, n, size, bound=2**30):
         """
-        Memory efficient (but slower) version of np.random.choice
+        Memory efficient substitute for np.random.choice without replacement
 
         Parameters:
         ===========
@@ -870,12 +870,21 @@ def _random_choice(self, n, size, bound=2**30):
         ========
         ids : 1d numpy array of shape (size, ) dtype=np.int32
         """
-        ids = []
-        while len(ids) < size:
-            rnd = np.random.randint(min(bound, n))
-            if rnd not in ids:
-                ids.append(rnd)
-        return np.array(ids, dtype=np.int32)
+        try:
+            if n < size:
+                raise ValueError('Drawing without replacement: '
+                                 '``n`` cannot be less than ``size``')
+
+            ids = []
+            while len(ids) < size:
+                rnd = np.random.randint(min(bound, n))
+                if rnd not in ids:
+                    ids.append(rnd)
+            return np.array(ids, dtype=np.int32)
+
+        except Exception:
+            raise ValueError('Internal error. '
+                             'Please save traceback and inform developers.')
 
     # -------------------------------------------------------------------------
     # -------------------------------------------------------------------------
@@ -946,7 +955,7 @@ def _get_params(self, attr, deep=True):
             return out
         out.update(estimators)
         for name, estimator in estimators:
-            for key, value in six.iteritems(estimator.get_params(deep=True)):
+            for key, value in iter(estimator.get_params(deep=True).items()):
                 out['%s__%s' % (name, key)] = value
         return out
 

From 65937dfa3f1c64da4e0134876c4a9d53766e3d14 Mon Sep 17 00:00:00 2001
From: vecxoz <vecxoz@gmail.com>
Date: Mon, 12 Aug 2019 17:12:55 +0300
Subject: [PATCH 2/3] allow_pickle=True for np.load in tests

---
 tests/test_func_api_classification_binary.py  | 32 +++++++--------
 ...test_func_api_classification_multiclass.py | 32 +++++++--------
 tests/test_func_api_regression.py             | 40 +++++++++----------
 3 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/tests/test_func_api_classification_binary.py b/tests/test_func_api_classification_binary.py
index 6bc6419..efabc24 100644
--- a/tests/test_func_api_classification_binary.py
+++ b/tests/test_func_api_classification_binary.py
@@ -145,7 +145,7 @@ def test_oof_pred_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -171,7 +171,7 @@ def test_oof_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -197,7 +197,7 @@ def test_pred_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -236,7 +236,7 @@ def test_oof_pred_bag_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -273,7 +273,7 @@ def test_pred_bag_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -304,7 +304,7 @@ def test_oof_pred_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -330,7 +330,7 @@ def test_oof_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -356,7 +356,7 @@ def test_pred_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -398,7 +398,7 @@ def test_oof_pred_bag_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
         
@@ -447,7 +447,7 @@ def test_pred_bag_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
     
@@ -491,7 +491,7 @@ def test_oof_pred_bag_mode_shuffle(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -609,7 +609,7 @@ def test_oof_pred_mode_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -672,7 +672,7 @@ def test_oof_pred_bag_mode_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -712,7 +712,7 @@ def test_oof_pred_mode_proba_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -784,7 +784,7 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
         
@@ -834,7 +834,7 @@ def test_N_dim_input(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
diff --git a/tests/test_func_api_classification_multiclass.py b/tests/test_func_api_classification_multiclass.py
index 8e3c3a5..f74eb4d 100644
--- a/tests/test_func_api_classification_multiclass.py
+++ b/tests/test_func_api_classification_multiclass.py
@@ -142,7 +142,7 @@ def test_oof_pred_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -168,7 +168,7 @@ def test_oof_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -194,7 +194,7 @@ def test_pred_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -233,7 +233,7 @@ def test_oof_pred_bag_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -270,7 +270,7 @@ def test_pred_bag_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -301,7 +301,7 @@ def test_oof_pred_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -327,7 +327,7 @@ def test_oof_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -353,7 +353,7 @@ def test_pred_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -395,7 +395,7 @@ def test_oof_pred_bag_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
         
@@ -444,7 +444,7 @@ def test_pred_bag_mode_proba(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
     
@@ -488,7 +488,7 @@ def test_oof_pred_bag_mode_shuffle(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -606,7 +606,7 @@ def test_oof_pred_mode_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -669,7 +669,7 @@ def test_oof_pred_bag_mode_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -709,7 +709,7 @@ def test_oof_pred_mode_proba_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -781,7 +781,7 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
         
@@ -832,7 +832,7 @@ def test_N_dim_input(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
diff --git a/tests/test_func_api_regression.py b/tests/test_func_api_regression.py
index ca4432c..05bf9f3 100644
--- a/tests/test_func_api_regression.py
+++ b/tests/test_func_api_regression.py
@@ -127,7 +127,7 @@ def test_oof_pred_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -154,7 +154,7 @@ def test_B_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -180,7 +180,7 @@ def test_oof_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -206,7 +206,7 @@ def test_pred_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -244,7 +244,7 @@ def test_oof_pred_bag_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -282,7 +282,7 @@ def test_A_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -318,7 +318,7 @@ def test_pred_bag_mode(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -352,7 +352,7 @@ def test_oof_pred_mode_sample_weight_one(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -387,7 +387,7 @@ def test_oof_pred_mode_sample_weight_random(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -419,7 +419,7 @@ def test_oof_pred_mode_transformations(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -557,7 +557,7 @@ def test_oof_pred_mode_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -619,7 +619,7 @@ def test_oof_pred_bag_mode_2_models(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -650,7 +650,7 @@ def test_oof_pred_mode_sparse_csr(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -677,7 +677,7 @@ def test_oof_pred_mode_sparse_csc(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -704,7 +704,7 @@ def test_oof_pred_mode_sparse_coo(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -735,7 +735,7 @@ def test_oof_pred_mode_sparse_csr_coo(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -766,7 +766,7 @@ def test_oof_pred_mode_sparse_csr_dense(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -795,7 +795,7 @@ def test_oof_mode_xtest_is_none(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -866,7 +866,7 @@ def test_oof_pred_mode_no_get_params(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 
@@ -930,7 +930,7 @@ def test_small_input(self):
         # Normally if cleaning is performed there is only one .npy file at given moment
         # But if we have no cleaning there may be more then one file so we take the latest
         file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
-        S = np.load(file_name)
+        S = np.load(file_name, allow_pickle=True)
         S_train_3 = S[0]
         S_test_3 = S[1]
 

From 55f9cad303076c625cd3e1125b1875ca9f4a3b88 Mon Sep 17 00:00:00 2001
From: Igor Ivanov <vecxoz@gmail.com>
Date: Mon, 12 Aug 2019 18:31:16 +0300
Subject: [PATCH 3/3] Improve text formatting

---
 PY2.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/PY2.md b/PY2.md
index 94c5945..da2c587 100644
--- a/PY2.md
+++ b/PY2.md
@@ -16,7 +16,8 @@ NOTE. It will require legacy versions of the following packages:
 * numpy<1.17
 * scipy<1.3
 * scikit-learn>=0.18,<0.21
+
 There is a dedicated branch on GitHub called `py2` with appropriate requirements in `setup.py`.  
 Installation:  
 
-`pip install https://github.com/vecxoz/vecstack/archive/py2.zip`
\ No newline at end of file
+`pip install https://github.com/vecxoz/vecstack/archive/py2.zip`