diff --git a/LICENSE b/LICENSE index c4a1257..c7159b0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,21 @@ -MIT License - -Copyright (c) 2020 nikhil kala - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +MIT License + +Copyright (c) 2020 nikhil kala + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..83adde0 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,17 @@ +MIT License +Copyright (c) 2020 Nikhil Kala +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 7c7ff5f..6643522 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# Categorical-Encode \ No newline at end of file +# categorical-encode \ No newline at end of file diff --git a/categorical_encode/__init__.py b/categorical_encode/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/categorical_encode/__init__.py @@ -0,0 +1 @@ + diff --git a/categorical_encode/categorical.py b/categorical_encode/categorical.py new file mode 100644 index 0000000..21b3668 --- /dev/null +++ b/categorical_encode/categorical.py @@ -0,0 +1,59 @@ + +def categorical( + dataframe=None, + normalize=False, + drop_columns=[], + drop_na=False, + target_columns=[], + ): + """ + :rtype: Pandas DataFrame + :param dataframe: The Input DataFrame(X) which you want to categorically encode. + :param normalize: This parameter determines if it will be between 0-1(1 included) or 1 to no. of classes (1 - no. of classes). default:False + :param drop_columns: This specifies the dataframe columns that need to be dropped as they are useless. default: No Columns + :param drop_na: This drops empty values (NaN) if is set to True. default: False + :param target_columns: This creates the target DataFrame(Y) without applying any Encoding. default: No Columns + :return: This Returns Two DataFrame(X,Y) if target_columns are provided Else only the Input dataframe(X) which is encoded. + """ + + import pandas as pd + if not isinstance(dataframe, pd.DataFrame): + raise ValueError('Type Error : Expects pd.DataFrame for the parameter -> "dataframe"' + ) + if len(dataframe.columns) == 0: + raise ValueError('Data Error : The parameter -> "dataframe" is empty' + ) + + # This Drops the columns if columns are provided for the dataframe using parameter: drop_columns + + dataframe.drop(columns=drop_columns, inplace=True) + + # This drops empty values (NaN) if drop_na flag is set to True. + + dataframe.dropna(inplace=drop_na) + + # Initializes the target_dataframe + + target_dataframe = pd.DataFrame(dataframe[target_columns]) + + # If target_columns is provided target_columns are not encoded and removed from the input dataframe. + + if len(target_columns) > 0: + dataframe.drop(columns=target_columns, inplace=True) + + for i in dataframe.columns: + lst = dataframe[i].unique() + dic = dict(zip(lst, range(1, len(lst) + 1))) + dataframe[i] = dataframe[i].map(dic) + + # The normalize flag determines if it will be between 0-1 or 1 to no. of classes (1 - no. of classes). + + if normalize: + dataframe[i] /= len(lst) + + # If target_columns is provided then it is returned as a Dataframe(Y) along with Input Dataframe(X). + + if len(target_columns) > 0: + return (dataframe, target_dataframe) + else: + return dataframe diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..badc95f --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ +# Inside of setup.cfg +[metadata] +description-file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f512a84 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +from distutils.core import setup +setup( + name = 'categorical_encode', + packages = ['categorical_encode'], + version = '0.1', + license='MIT', + description = 'This Library converts categorical data of any kind {integer, float, strings} into discrete values{1,2,3... # Classes}.', + author = 'Nikhil Kala', + author_email = 'nikhilkala8@gmail.com', + url = 'https://github.com/nikhilkala/Categorical-Encode', + download_url = 'https://github.com/nikhilkala/Categorical-Encode/archive/v0.1.tar.gz', + keywords = ['Machine Learning', 'Categorical Data', 'Encoding', 'Deep Learning','Pandas', 'DataFrame', 'Normalization'], + install_requires=[ + 'pandas', + ], + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Build Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8' + ], +)