From 27eecd36e127aa8579adc1c0f6efcbb4cf043d45 Mon Sep 17 00:00:00 2001
From: Faris Osman <farismosman@gmail.com>
Date: Sun, 28 Apr 2019 14:27:13 +0200
Subject: [PATCH 1/8] [Faris] ignores local files

---
 .gitignore | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index ff8ef67..d65bc29 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,7 @@ wheels/
 .installed.cfg
 *.egg
 MANIFEST
+requirements.txt
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
@@ -104,4 +105,7 @@ venv.bak/
 .mypy_cache/
 
 # PyCharm
-.idea/
\ No newline at end of file
+.idea/
+
+# vscode
+.vscode/
\ No newline at end of file

From ed36d2d3d8600a80cbdf357d59405f256cc83fbd Mon Sep 17 00:00:00 2001
From: Faris Osman <farismosman@gmail.com>
Date: Sun, 28 Apr 2019 14:28:36 +0200
Subject: [PATCH 2/8] test utils: get_cols_features

---
 tests/utils_test.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 tests/utils_test.py

diff --git a/tests/utils_test.py b/tests/utils_test.py
new file mode 100644
index 0000000..f6e1dac
--- /dev/null
+++ b/tests/utils_test.py
@@ -0,0 +1,27 @@
+import unittest
+from causallift import utils
+import pandas as pd
+import numpy as np
+
+
+class UtilsTest(unittest.TestCase):
+    
+    def setUp(self):
+        pass
+
+    def test_get_cols_features_should_return_feature_columns_excluding_default_non_feature(self):
+        df = pd.DataFrame(data=np.random.rand(3, 6), columns=['var1', 'var2', 'var3', 'Treatment', 'Outcome', 'Propensity'])
+
+        result = utils.get_cols_features(df)
+
+        self.assertEqual(['var1', 'var2', 'var3'], result)
+
+    def test_get_cols_features_should_return_feature_columns_excluding_non_default_non_feature(self):
+        df = pd.DataFrame(data=np.random.rand(3, 6), columns=['var1', 'var2', 'var3', 'MarketedTo', 'Outcome', 'Probability'])
+
+        result = utils.get_cols_features(df, non_feature_cols=['MarketedTo', 'Outcome', 'Probability'])
+
+        self.assertEqual(['var1', 'var2', 'var3'], result)
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file

From dc0db6d167ffe71bd5c80a8b78b18aa93f5c9995 Mon Sep 17 00:00:00 2001
From: Faris Osman <farismosman@gmail.com>
Date: Sun, 28 Apr 2019 15:49:38 +0200
Subject: [PATCH 3/8] [Faris] test concat methods

---
 tests/utils_test.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/utils_test.py b/tests/utils_test.py
index f6e1dac..19babdf 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -23,5 +23,23 @@ def test_get_cols_features_should_return_feature_columns_excluding_non_default_n
 
         self.assertEqual(['var1', 'var2', 'var3'], result)
 
+    def test_concat_train_test_should_concatnate_both_sets_into_series_with_keys(self):
+        train_df = pd.DataFrame(data=np.random.rand(3, 3), columns=['var1', 'var2', 'var3'])
+        test_df = pd.DataFrame(data=np.random.rand(3, 3), columns=['var1', 'var2', 'var3'])
+
+        result = utils.concat_train_test(train=train_df, test=test_df)
+
+        pd.testing.assert_series_equal(pd.Series(train_df), result.xs('train'))
+        pd.testing.assert_series_equal(pd.Series(test_df), result.xs('test'))
+
+    def test_concat_train_test_df_should_concatnate_both_sets_into_frames_with_keys(self):
+        train_df = pd.DataFrame(data=np.random.rand(3, 3), columns=['var1', 'var2', 'var3'])
+        test_df = pd.DataFrame(data=np.random.rand(3, 3), columns=['var1', 'var2', 'var3'])
+
+        result = utils.concat_train_test_df(train=train_df, test=test_df)
+
+        pd.testing.assert_frame_equal(train_df, result.xs('train'))
+        pd.testing.assert_frame_equal(test_df, result.xs('test'))
+
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file

From 3970d774345766091f483195829f08f494664be1 Mon Sep 17 00:00:00 2001
From: Faris Osman <farismosman@gmail.com>
Date: Sun, 28 Apr 2019 16:29:40 +0200
Subject: [PATCH 4/8] [Faris] test len_t and len_o

---
 tests/utils_test.py | 54 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/tests/utils_test.py b/tests/utils_test.py
index 19babdf..a0b6f01 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -2,6 +2,7 @@
 from causallift import utils
 import pandas as pd
 import numpy as np
+import random
 
 
 class UtilsTest(unittest.TestCase):
@@ -41,5 +42,58 @@ def test_concat_train_test_df_should_concatnate_both_sets_into_frames_with_keys(
         pd.testing.assert_frame_equal(train_df, result.xs('train'))
         pd.testing.assert_frame_equal(test_df, result.xs('test'))
 
+    def test_len_t_should_return_the_number_of_records_where_treatment_equals_1(self):
+        df = pd.DataFrame(data=np.random.rand(6, 2), columns=['var1', 'var2'])
+        df['Treatment'] = [random.sample(range(2), 1)[0] for i in range(6)]
+
+        length = df[df['Treatment'] == 1].shape[0]
+        result = utils.len_t(df)
+
+        self.assertEqual(length, result)
+
+    def test_len_t_should_return_the_number_of_records_where_treatment_equals_0(self):
+        df = pd.DataFrame(data=np.random.rand(6, 2), columns=['var1', 'var2'])
+        df['Treatment'] = [random.sample(range(2), 1)[0] for i in range(6)]
+
+        length = df[df['Treatment'] == 0].shape[0]
+        result = utils.len_t(df, treatment=0)
+
+        self.assertEqual(length, result)
+
+    def test_len_t_should_return_the_number_of_records_where_treatment_equals_0_and_treatment_col_is_not_default(self):
+        df = pd.DataFrame(data=np.random.rand(6, 2), columns=['var1', 'var2'])
+        df['MarketedTo'] = [random.sample(range(2), 1)[0] for i in range(6)]
+
+        length = df[df['MarketedTo'] == 0].shape[0]
+        result = utils.len_t(df, treatment=0, col_treatment='MarketedTo')
+
+        self.assertEqual(length, result)
+
+    def test_len_o_should_return_the_number_of_records_where_outcome_is_1(self):
+        df = pd.DataFrame(data=np.random.rand(6, 2), columns=['var1', 'var2'])
+        df['Outcome'] = [random.sample(range(2), 1)[0] for i in range(6)]
+
+        length = df[df['Outcome'] == 1].shape[0]
+        result = utils.len_o(df)
+
+        self.assertEqual(length, result)
+
+    def test_len_o_should_return_the_number_of_records_where_outcome_is_0(self):
+        df = pd.DataFrame(data=np.random.rand(6, 2), columns=['var1', 'var2'])
+        df['Outcome'] = [random.sample(range(2), 1)[0] for i in range(6)]
+
+        length = df[df['Outcome'] == 0].shape[0]
+        result = utils.len_o(df, outcome=0)
+
+        self.assertEqual(length, result)
+
+    def test_len_o_should_return_the_number_of_records_where_outcome_equals_0_and_outcome_col_is_not_default(self):
+        df = pd.DataFrame(data=np.random.rand(6, 2), columns=['var1', 'var2'])
+        df['Result'] = [random.sample(range(2), 1)[0] for i in range(6)]
+
+        length = df[df['Result'] == 0].shape[0]
+        result = utils.len_o(df, outcome=0, col_outcome='Result')
+
+        self.assertEqual(length, result)
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file

From 844569d37aad62bf08c86ca05e499eb8931dee59 Mon Sep 17 00:00:00 2001
From: Faris Osman <farismosman@gmail.com>
Date: Sun, 28 Apr 2019 19:19:55 +0200
Subject: [PATCH 5/8] [Faris] test len_to

---
 tests/utils_test.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tests/utils_test.py b/tests/utils_test.py
index a0b6f01..c4929ab 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -95,5 +95,37 @@ def test_len_o_should_return_the_number_of_records_where_outcome_equals_0_and_ou
         result = utils.len_o(df, outcome=0, col_outcome='Result')
 
         self.assertEqual(length, result)
+
+    def test_len_to_should_return_the_number_of_records_where_outcome_and_treatment_is_1(self):
+        df = pd.DataFrame(data=np.random.rand(12, 2), columns=['var1', 'var2'])
+        df['Outcome'] = [random.sample(range(2), 1)[0] for i in range(12)]
+        df['Treatment'] = [random.sample(range(2), 1)[0] for i in range(12)]
+
+        length = df[(df['Treatment'] == 1) & (df['Outcome'] == 1)].shape[0]
+        result = utils.len_to(df)
+
+        self.assertEqual(length, result)
+
+    def test_len_to_should_return_the_number_of_records_where_outcome_and_treatment_are_different(self):
+        df = pd.DataFrame(data=np.random.rand(12, 2), columns=['var1', 'var2'])
+        df['Outcome'] = [random.sample(range(2), 1)[0] for i in range(12)]
+        df['Treatment'] = [random.sample(range(2), 1)[0] for i in range(12)]
+
+        length = df[(df['Treatment'] == 1) & (df['Outcome'] == 0)].shape[0]
+        result = utils.len_to(df, outcome=0)
+
+        self.assertEqual(length, result)
+
+    def test_len_to_should_return_the_number_of_records_where_outcome_and_treatment_are_different_with_custom_column_names(self):
+        df = pd.DataFrame(data=np.random.rand(12, 2), columns=['var1', 'var2'])
+        df['result'] = [random.sample(range(2), 1)[0] for i in range(12)]
+        df['marketed_to'] = [random.sample(range(2), 1)[0] for i in range(12)]
+
+        length = df[(df['marketed_to'] == 1) & (df['result'] == 0)].shape[0]
+        result = utils.len_to(df, outcome=0, col_outcome='result', col_treatment='marketed_to')
+
+        self.assertEqual(length, result)
+
+
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file

From e15b963111e09e42da87376b2f359d2ddf79b7e7 Mon Sep 17 00:00:00 2001
From: Faris Osman <farismosman@gmail.com>
Date: Sun, 28 Apr 2019 19:58:44 +0200
Subject: [PATCH 6/8] [Faris] test methods that compute fractions of treatment
 and outcome

---
 tests/utils_test.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tests/utils_test.py b/tests/utils_test.py
index c4929ab..aafcd07 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -126,6 +126,41 @@ def test_len_to_should_return_the_number_of_records_where_outcome_and_treatment_
 
         self.assertEqual(length, result)
 
+    def test_treatment_fraction_should_compute_percentage_of_treated(self):
+        df = pd.DataFrame(data=np.random.rand(12, 2), columns=['var1', 'var2'])
+        df['Treatment'] = [random.sample(range(2), 1)[0] for i in range(12)]
+
+        value = len(df[df['Treatment'] == 1])/len(df)
+        result = utils.treatment_fraction_(df)
+
+        self.assertEqual(value, result)
+
+    def test_treatment_fraction_should_compute_percentage_of_treated_with_custom_name(self):
+        df = pd.DataFrame(data=np.random.rand(12, 2), columns=['var1', 'var2'])
+        df['marketed'] = [random.sample(range(2), 1)[0] for i in range(12)]
+
+        value = len(df[df['marketed'] == 1])/len(df)
+        result = utils.treatment_fraction_(df, col_treatment='marketed')
+
+        self.assertEqual(value, result)
+
+    def test_outcome_fraction_should_compute_percentage_of_positive_outcome(self):
+        df = pd.DataFrame(data=np.random.rand(12, 2), columns=['var1', 'var2'])
+        df['Outcome'] = [random.sample(range(2), 1)[0] for i in range(12)]
+
+        value = len(df[df['Outcome'] == 1])/len(df)
+        result = utils.outcome_fraction_(df)
+
+        self.assertEqual(value, result)
+
+    def test_outcome_fraction_should_compute_percentage_of_positive_outcome_with_custom_name(self):
+        df = pd.DataFrame(data=np.random.rand(12, 2), columns=['var1', 'var2'])
+        df['result'] = [random.sample(range(2), 1)[0] for i in range(12)]
+
+        value = len(df[df['result'] == 1])/len(df)
+        result = utils.outcome_fraction_(df, col_outcome='result')
+
+        self.assertEqual(value, result)
 
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file

From 72f416f27215844860ad926e286a0b4e5aee210d Mon Sep 17 00:00:00 2001
From: Faris Osman <farismosman@gmail.com>
Date: Sun, 28 Apr 2019 20:57:04 +0200
Subject: [PATCH 7/8] [Faris] test uplift gain for 'sure thing'. Also fixed
 default column names

---
 causallift/utils.py |  4 ++--
 tests/utils_test.py | 31 +++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/causallift/utils.py b/causallift/utils.py
index 39286fe..c4ef3ce 100644
--- a/causallift/utils.py
+++ b/causallift/utils.py
@@ -45,8 +45,8 @@ def outcome_fraction_(df, col_outcome='Outcome'):
 def overall_uplift_gain_(df, treatment=1.0, outcome=1.0,
                          col_treatment='Treatment', col_outcome='Outcome'):
     overall_uplift_gain = \
-        (len_to(df, col_treatment='Treatment', col_outcome='Outcome') / len_t(df, col_treatment=col_treatment)) \
-        - (len_to(df, 0, 1, col_treatment='Treatment', col_outcome='Outcome') / len_t(df, 0,
+        (len_to(df, col_treatment=col_treatment, col_outcome=col_outcome) / len_t(df, col_treatment=col_treatment)) \
+        - (len_to(df, 0, 1, col_treatment=col_treatment, col_outcome=col_outcome) / len_t(df, 0,
                                                                                       col_treatment=col_treatment))
     return overall_uplift_gain
 
diff --git a/tests/utils_test.py b/tests/utils_test.py
index aafcd07..2ef55a6 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -162,5 +162,36 @@ def test_outcome_fraction_should_compute_percentage_of_positive_outcome_with_cus
 
         self.assertEqual(value, result)
 
+    def test_overall_uplift_gain_should_compute_uplift_for_sure_things(self):
+        df = pd.DataFrame(data=np.random.rand(12, 2), columns=['var1', 'var2'])
+        df['Outcome'] = [random.sample(range(2), 1)[0] for i in range(12)]
+        df['Treatment'] = [random.sample(range(2), 1)[0] for i in range(12)]
+
+        no_treated_positive_outcome = df[(df['Treatment'] == 1) & (df['Outcome'] == 1)].shape[0]
+        no_not_treated_positive_outcome = df[(df['Treatment'] == 0) & (df['Outcome'] == 1)].shape[0]
+        no_treated = df[df['Treatment'] == 1].shape[0]
+        no_not_treated = df[df['Treatment'] == 0].shape[0]
+
+        gain = (no_treated_positive_outcome/no_treated) - (no_not_treated_positive_outcome/no_not_treated)
+        result = utils.overall_uplift_gain_(df)
+
+        self.assertEqual(gain, result)
+
+    def test_overall_uplift_gain_should_compute_uplift_for_sure_things_with_custom_colum_names(self):
+        df = pd.DataFrame(data=np.random.rand(12, 2), columns=['var1', 'var2'])
+        df['Result'] = [random.sample(range(2), 1)[0] for i in range(12)]
+        df['Contacted'] = [random.sample(range(2), 1)[0] for i in range(12)]
+
+        no_treated_positive_outcome = df[(df['Contacted'] == 1) & (df['Result'] == 1)].shape[0]
+        no_not_treated_positive_outcome = df[(df['Contacted'] == 0) & (df['Result'] == 1)].shape[0]
+        no_treated = df[df['Contacted'] == 1].shape[0]
+        no_not_treated = df[df['Contacted'] == 0].shape[0]
+
+        gain = (no_treated_positive_outcome/no_treated) - (no_not_treated_positive_outcome/no_not_treated)
+        result = utils.overall_uplift_gain_(df, col_treatment='Contacted', col_outcome='Result')
+
+        self.assertEqual(gain, result)
+
+
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file

From 85262c2cf9d4f9534756e2c538b4dcb912fc72cd Mon Sep 17 00:00:00 2001
From: Faris Osman <farismosman@gmail.com>
Date: Sun, 28 Apr 2019 21:24:00 +0200
Subject: [PATCH 8/8] [Faris] imports missing numpy module

---
 causallift/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/causallift/utils.py b/causallift/utils.py
index c4ef3ce..68c3d1d 100644
--- a/causallift/utils.py
+++ b/causallift/utils.py
@@ -2,6 +2,7 @@
 """ Utility functions """
 
 import pandas as pd
+import numpy as np
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
 from sklearn.metrics import confusion_matrix