Updates

minor updates (loading bars etc)
Metalkiler · Jul 16, 2020 · b63b784 · b63b784
1 parent 7bf7f3a
commit b63b784
Show file tree

Hide file tree

Showing 10 changed files with 59 additions and 65 deletions.
diff --git a/README.md b/README.md
@@ -23,36 +23,24 @@ This method results in 689 binary inputs, which is much less than the 10690 bina
 
 It is possible to apply these transformations to specific columns only instead of the full dataset (follow the example).
 
+New Feature :
 
-# Installation
-
-## Stable Version
-To install this package please run the following command
-
-``` cmd
-pip install cane
-
-
-```
-## Beta Version
-
-Which in this version will contain pre-release versions of Cane that have new function which the stable version has not, and allow the users for their feedback and usage.
-
-BETA Version
 
 [x] - New function called multicolumn (for PCP and IDF only). This function will aggregate 2 or more columns into a single one and apply the transformation to it. Afterwards it will map the transformation obtained into the disaggregated columns.
 
-More to come!
 
 
+# Installation
+
+## Stable Version
+To install this package please run the following command
 
 ``` cmd
-pip install cane==0.0.1.7.7b1
+pip install cane
 
 
 ```
 
-
 # Suggestions and feedback
 
 Any feedback will be appreciated.
@@ -104,7 +92,7 @@ dataH4 = cane.one_hot(df, column_prefix='column', n_coresJob=2
 
 
 
-#specific example with multicolumn BETA ONLY!
+#specific example with multicolumn
 x2 = [k for s in ([k] * n for k, n in [('a', 50),
                                        ('b', 10),
                                        ('c', 20),

diff --git a/cane/README.md b/cane/README.md
@@ -23,14 +23,22 @@ This method results in 689 binary inputs, which is much less than the 10690 bina
 
 It is possible to apply these transformations to specific columns only instead of the full dataset (follow the example).
 
+New Feature :
+
+
+[x] - New function called multicolumn (for PCP and IDF only). This function will aggregate 2 or more columns into a single one and apply the transformation to it. Afterwards it will map the transformation obtained into the disaggregated columns.
+
+
 
 # Installation
 
+## Stable Version
 To install this package please run the following command
 
 ``` cmd
 pip install cane
 
+
 ```
 
 # Suggestions and feedback
@@ -84,8 +92,7 @@ dataH4 = cane.one_hot(df, column_prefix='column', n_coresJob=2
 
 
 
-
-#specific example with multicolumn 
+#specific example with multicolumn
 x2 = [k for s in ([k] * n for k, n in [('a', 50),
                                        ('b', 10),
                                        ('c', 20),
@@ -110,9 +117,6 @@ print("multicolumn idf \n",dataIDF2)
 
 
 
-
-
-
 #Time Measurement in 10 runs
 print("Time Measurement in 10 runs (unicore)")
 OT = timeit.timeit(lambda:cane.one_hot(df, column_prefix='column', n_coresJob=1),number = 10)

diff --git a/cane/build/lib/cane/__init__.py b/cane/build/lib/cane/__init__.py
@@ -19,6 +19,7 @@
 from pqdm.processes import pqdm
 from functools import partial
 import itertools
+from tqdm import tqdm
 
 
 def __pcp_single__(f, perc_inner=0.05, mergeCategoryinner="Others"):
@@ -90,11 +91,12 @@ def pcp(dataset=pd.DataFrame(), perc=0.05, mergeCategory="Others", n_coresJob=1,
 
 
 def pcp_multicolumn(dataset=pd.DataFrame(), perc=0.05, mergeCategory="Others",
-                    columns_use=None):
+                    columns_use=None, disableLoadBar=True):
     """
     Similarly to the normal PCP this function uses X columns given merges and applies the pcp transformation to it.
     Next it will apply the transformation into the disaggregated columns sharing the transformation obtained previously
 
+    :param disableLoadBar: Chooses if you want load bar or not (default = True)
     :param columns_use: Specific columns to apply transformation.
     :param mergeCategory: Category for merging the data (by default "Others")
     :param dataset: dataset to transform
@@ -126,25 +128,21 @@ def pcp_multicolumn(dataset=pd.DataFrame(), perc=0.05, mergeCategory="Others",
 
         d = __pcp_single__(dfTesting, perc_inner=perc, mergeCategoryinner=mergeCategory)
         dic = {v: [i for i in np.unique(v)][0] for _, v in d.items()}
-        for column in columns_use:
+        for column in tqdm(columns_use, desc="Transformation", total=len(columns_use), disable=disableLoadBar):
             TransformedData[column] = TransformedData[column].map(dic)
             TransformedData[column] = TransformedData[column].fillna(mergeCategory)  # because of others
-        # dfFinal = pd.concat([i for i in d], axis=1)
-        # dfFinal.columns = columns_use
-        # dfFinal = pd.concat([dfFinal, TransformedData[TransformedData.columns.difference(columns_use, sort=False)]],
-        #                     axis=1,
-        #                     sort=True)
 
     return TransformedData
 
 
-def idf_multicolumn(dataset, columns_use=None):
+def idf_multicolumn(dataset, columns_use=None, disableLoadBar=True):
     """
     The Inverse Document Frequency (IDF) uses f(x)= log(n/f_x),
     where n is the length of x and f_x is the frequency of x.
     Next it will apply the transformation into the disaggregated columns sharing
     the transformation obtained previously
 
+    :param disableLoadBar: Chooses if you want load bar or not (default = True)
     :param columns_use: List of columns to use
     :param dataset: dataset to transform
 
@@ -170,7 +168,7 @@ def idf_multicolumn(dataset, columns_use=None):
         dfTesting = pd.Series([y for x in mergedColumn for y in x], name="X")
 
         d = __idf_single_dic__(dfTesting)
-        for column in columns_use:
+        for column in tqdm(columns_use, desc="Transformation", total=len(columns_use), disable=disableLoadBar):
             TransformedData[column] = TransformedData[column].replace(d)
     return TransformedData
 

diff --git a/cane/cane.egg-info/PKG-INFO b/cane/cane.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cane
-Version: 0.0.1.7.7
+Version: 2.0.1
 Summary: Cane - Categorical Attribute traNsformation Environment
 Home-page: https://github.com/Metalkiler/Cane-Categorical-Attribute-traNsformation-Environment
 Author: Luís Miguel Matos, Paulo Cortez, Rui Mendes
@@ -31,14 +31,22 @@ Description: # Cane - Categorical Attribute traNsformation Environment
 
         It is possible to apply these transformations to specific columns only instead of the full dataset (follow the example).
 
+        New Feature :
+
+
+        [x] - New function called multicolumn (for PCP and IDF only). This function will aggregate 2 or more columns into a single one and apply the transformation to it. Afterwards it will map the transformation obtained into the disaggregated columns.
+
+
 
         # Installation
 
+        ## Stable Version
         To install this package please run the following command
 
         ``` cmd
         pip install cane
 
+
         ```
 
         # Suggestions and feedback
@@ -92,8 +100,7 @@ Description: # Cane - Categorical Attribute traNsformation Environment
 
 
 
-
-        #specific example with multicolumn 
+        #specific example with multicolumn
         x2 = [k for s in ([k] * n for k, n in [('a', 50),
                                                ('b', 10),
                                                ('c', 20),
@@ -118,9 +125,6 @@ Description: # Cane - Categorical Attribute traNsformation Environment
 
 
 
-
-
-
         #Time Measurement in 10 runs
         print("Time Measurement in 10 runs (unicore)")
         OT = timeit.timeit(lambda:cane.one_hot(df, column_prefix='column', n_coresJob=1),number = 10)

diff --git a/cane/cane.egg-info/requires.txt b/cane/cane.egg-info/requires.txt
@@ -1,8 +1,9 @@
-bounded-pool-executor
-numpy
-pandas
-pqdm
-python-dateutil
-pytz
-tqdm
-typing-extensions
+bounded-pool-executor==0.0.3
+numpy==1.18.4
+pandas==1.0.4
+pqdm==0.1.0
+python-dateutil==2.8.1
+pytz==2020.1
+tqdm==4.46.0
+typing-extensions==3.7.4.2
+pqdm==0.1.0
diff --git a/cane/cane/__init__.py b/cane/cane/__init__.py
@@ -19,6 +19,7 @@
 from pqdm.processes import pqdm
 from functools import partial
 import itertools
+from tqdm import tqdm
 
 
 def __pcp_single__(f, perc_inner=0.05, mergeCategoryinner="Others"):
@@ -90,11 +91,12 @@ def pcp(dataset=pd.DataFrame(), perc=0.05, mergeCategory="Others", n_coresJob=1,
 
 
 def pcp_multicolumn(dataset=pd.DataFrame(), perc=0.05, mergeCategory="Others",
-                    columns_use=None):
+                    columns_use=None, disableLoadBar=True):
     """
     Similarly to the normal PCP this function uses X columns given merges and applies the pcp transformation to it.
     Next it will apply the transformation into the disaggregated columns sharing the transformation obtained previously
 
+    :param disableLoadBar: Chooses if you want load bar or not (default = True)
     :param columns_use: Specific columns to apply transformation.
     :param mergeCategory: Category for merging the data (by default "Others")
     :param dataset: dataset to transform
@@ -126,25 +128,21 @@ def pcp_multicolumn(dataset=pd.DataFrame(), perc=0.05, mergeCategory="Others",
 
         d = __pcp_single__(dfTesting, perc_inner=perc, mergeCategoryinner=mergeCategory)
         dic = {v: [i for i in np.unique(v)][0] for _, v in d.items()}
-        for column in columns_use:
+        for column in tqdm(columns_use, desc="Transformation", total=len(columns_use), disable=disableLoadBar):
             TransformedData[column] = TransformedData[column].map(dic)
             TransformedData[column] = TransformedData[column].fillna(mergeCategory)  # because of others
-        # dfFinal = pd.concat([i for i in d], axis=1)
-        # dfFinal.columns = columns_use
-        # dfFinal = pd.concat([dfFinal, TransformedData[TransformedData.columns.difference(columns_use, sort=False)]],
-        #                     axis=1,
-        #                     sort=True)
 
     return TransformedData
 
 
-def idf_multicolumn(dataset, columns_use=None):
+def idf_multicolumn(dataset, columns_use=None, disableLoadBar=True):
     """
     The Inverse Document Frequency (IDF) uses f(x)= log(n/f_x),
     where n is the length of x and f_x is the frequency of x.
     Next it will apply the transformation into the disaggregated columns sharing
     the transformation obtained previously
 
+    :param disableLoadBar: Chooses if you want load bar or not (default = True)
     :param columns_use: List of columns to use
     :param dataset: dataset to transform
 
@@ -170,7 +168,7 @@ def idf_multicolumn(dataset, columns_use=None):
         dfTesting = pd.Series([y for x in mergedColumn for y in x], name="X")
 
         d = __idf_single_dic__(dfTesting)
-        for column in columns_use:
+        for column in tqdm(columns_use, desc="Transformation", total=len(columns_use), disable=disableLoadBar):
             TransformedData[column] = TransformedData[column].replace(d)
     return TransformedData
 

diff --git a/cane/dist/cane-2.0.1-py3-none-any.whl b/cane/dist/cane-2.0.1-py3-none-any.whl
diff --git a/cane/dist/cane-2.0.1.tar.gz b/cane/dist/cane-2.0.1.tar.gz
diff --git a/cane/req.txt b/cane/req.txt
@@ -1,8 +1,9 @@
-bounded-pool-executor
-numpy
-pandas
-pqdm
-python-dateutil
-pytz
-tqdm
-typing-extensions
+bounded-pool-executor==0.0.3
+numpy==1.18.4
+pandas==1.0.4
+pqdm==0.1.0
+python-dateutil==2.8.1
+pytz==2020.1
+tqdm==4.46.0
+typing-extensions==3.7.4.2
+pqdm==0.1.0
diff --git a/cane/setup.py b/cane/setup.py
@@ -8,7 +8,7 @@
 
 
 setuptools.setup(name='cane',
-                 version='0.0.1.7.7',
+                 version='2.0.1',
                  description='Cane - Categorical Attribute traNsformation Environment',
                  author='Luís Miguel Matos, Paulo Cortez, Rui Mendes',
                  license='MIT',