Merge pull request #11 from oislen/dev

New Unittests for Utilities
oislen · Oct 5, 2024 · 03b5bf8 · 03b5bf8
2 parents d467b99 + 800ddd7
commit 03b5bf8
Show file tree

Hide file tree

Showing 5 changed files with 259 additions and 0 deletions.
diff --git a/scripts/unittests/utilities/test_gen_country_codes_map.py b/scripts/unittests/utilities/test_gen_country_codes_map.py
@@ -0,0 +1,45 @@
+import unittest
+import os
+import sys
+import numpy as np
+
+sys.path.append(os.path.join(os.getcwd(), "scripts"))
+
+import cons
+from utilities.gen_country_codes_map import gen_country_codes_map
+
+np.random.seed(cons.unittest_seed)
+
+exp_country_codes_map = {
+    804: 'UA', 250: 'FR', 724: 'ES', 752: 'SE', 276: 'DE', 246: 'FI', 578: 'NO', 616: 'PL', 380: 'IT', 826: 'GB', 642: 'RO', 
+    112: 'BY', 300: 'GR', 100: 'BG', 352: 'IS', 620: 'PT', 203: 'CZ', 208: 'DK', 348: 'HU', 688: 'RS', 40: 'AT', 372: 'IE', 
+    440: 'LT', 428: 'LV', 191: 'HR', 70: 'BA', 703: 'SK', 233: 'EE', 528: 'NL', 756: 'CH', 498: 'MD', 56: 'BE', 8: 'AL', 
+    807: 'MK', 705: 'SI', 499: 'ME', 196: 'CY', 442: 'LU', 234: 'FO', 20: 'AD', 470: 'MT', 438: 'LI', 831: 'GG', 674: 'SM', 
+    292: 'GI', 492: 'MC', 336: 'VA'
+    }
+
+fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1]
+obs_country_codes_map = gen_country_codes_map(fpath_countrieseurope=fpath_countrieseurope)
+
+class Test_gen_country_codes_dict(unittest.TestCase):
+    """"""
+
+    def setUp(self):
+        self.exp_country_codes_map = exp_country_codes_map
+        self.obs_country_codes_map = obs_country_codes_map
+
+    def test_type(self):
+        self.assertEqual(type(self.exp_country_codes_map), type(self.obs_country_codes_map))
+
+    def test_len(self):
+        self.assertEqual(len(self.exp_country_codes_map), len(self.obs_country_codes_map))
+
+    def test_keys(self):
+        self.assertEqual(list(self.exp_country_codes_map.keys()), list(self.obs_country_codes_map.keys()))
+
+    def test_values(self):
+        self.assertEqual(list(self.exp_country_codes_map.values()), list(self.obs_country_codes_map.values()))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scripts/unittests/utilities/test_gen_obj_idhash_series.py b/scripts/unittests/utilities/test_gen_obj_idhash_series.py
@@ -0,0 +1,70 @@
+import unittest
+import os
+import sys
+import random
+import numpy as np
+import pandas as pd
+
+sys.path.append(os.path.join(os.getcwd(), "scripts"))
+
+import cons
+
+random.seed(cons.unittest_seed)
+np.random.seed(cons.unittest_seed)
+
+from utilities.gen_random_entity_counts import gen_random_entity_counts
+from utilities.gen_obj_idhash_series import gen_obj_idhash_series
+from objects.User import User
+from objects.Device import Device
+
+start_date = cons.unittest_registration_start_date
+end_date = cons.unittest_registration_end_date
+n_user_ids = cons.unittest_n_entities
+fpath_firstnames = '.' + cons.fpath_firstnames.split(cons.fpath_repo_dir)[1]
+fpath_lastnames = '.' + cons.fpath_lastnames.split(cons.fpath_repo_dir)[1]
+fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1]
+fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1]
+fpath_smartphones = '.' + cons.fpath_smartphones.split(cons.fpath_repo_dir)[1]
+
+random.seed(cons.unittest_seed)
+np.random.seed(cons.unittest_seed)
+
+# create user object
+user_object = User(n_user_ids=n_user_ids, start_date=start_date, end_date=end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email)
+# generate random entity counts
+random_entity_counts = gen_random_entity_counts(user_obj=user_object)
+# generate random entity values
+device_obj = Device(n_device_hashes=random_entity_counts['n_devices'].sum(), fpath_smartphones=fpath_smartphones)
+#  generate user data and device hashes
+user_data = random_entity_counts.copy()
+obs_obj_idhash_series = gen_obj_idhash_series(idhashes_props_dict=device_obj.device_hashes_props_dict, n_counts_series=user_data['n_devices'])
+exp_obj_idhash_series = pd.Series([['2ff23757073a0735'], ['73d2fd828c1fd115'], ['2fc83030d4f37f76', '20f0fba2565dd55c', '257aa14d0bef04bc'], ['f232f0f0bbdcd452']])
+
+class Test_gen_idhash_cnt_dict(unittest.TestCase):
+    """"""
+
+    def setUp(self):
+        self.obs_obj_idhash_series = obs_obj_idhash_series
+        self.exp_obj_idhash_series = exp_obj_idhash_series
+
+    def test_type(self):
+        self.assertEqual(type(self.obs_obj_idhash_series), type(self.exp_obj_idhash_series))
+
+    def test_shape(self):
+        self.assertEqual(self.obs_obj_idhash_series.shape, self.exp_obj_idhash_series.shape)
+
+    def test_dtypes(self):
+        self.assertEqual(self.obs_obj_idhash_series.dtypes, self.exp_obj_idhash_series.dtypes)
+
+    def test_isnull(self):
+        self.assertTrue((self.obs_obj_idhash_series.isnull() == self.exp_obj_idhash_series.isnull()).all().all())
+
+    def test_notnull(self):
+        self.assertTrue((self.obs_obj_idhash_series.notnull() == self.exp_obj_idhash_series.notnull()).all().all())
+
+    def test_object(self):
+        self.assertTrue((self.obs_obj_idhash_series.explode() == self.exp_obj_idhash_series.explode()).all().all())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scripts/unittests/utilities/test_gen_random_entity_counts.py b/scripts/unittests/utilities/test_gen_random_entity_counts.py
@@ -0,0 +1,60 @@
+import unittest
+import os
+import sys
+import numpy as np
+import pandas as pd
+import random
+
+sys.path.append(os.path.join(os.getcwd(), "scripts"))
+
+import cons
+from utilities.gen_random_entity_counts import gen_random_entity_counts
+from objects.User import User
+
+exp_start_date = cons.unittest_registration_start_date
+exp_end_date = cons.unittest_registration_end_date
+exp_n_user_ids = cons.unittest_n_entities
+exp_lam = cons.data_model_poisson_params["user"]["lambda"]
+
+random.seed(cons.unittest_seed)
+np.random.seed(cons.unittest_seed)
+
+fpath_firstnames = '.' + cons.fpath_firstnames.split(cons.fpath_repo_dir)[1]
+fpath_lastnames = '.' + cons.fpath_lastnames.split(cons.fpath_repo_dir)[1]
+fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1]
+fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1]
+user_object = User(exp_n_user_ids, exp_start_date, exp_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email)
+
+exp_randomentity_counts_dict = {
+    'uid': ['4264861381989413', '6374692674377254', '1751409580926382', '6720317315593519'], 
+    'n_devices': [1, 1, 3, 1], 
+    'n_cards': [1, 1, 1, 1], 
+    'n_ips': [6, 5, 6, 3], 
+    'n_transactions': [14, 41, 68, 55], 
+    'n_applications': [1, 7, 20, 3]
+    }
+
+exp_randomentity_counts_df = pd.DataFrame.from_dict(exp_randomentity_counts_dict)
+obs_random_entity_counts_df = gen_random_entity_counts(user_object)
+
+class Test_gen_random_entity_counts(unittest.TestCase):
+    """"""
+
+    def setUp(self):
+        self.exp_randomentity_counts_df = exp_randomentity_counts_df
+        self.obs_random_entity_counts_df = obs_random_entity_counts_df
+
+    def test_type(self):
+        self.assertEqual(type(self.exp_randomentity_counts_df), type(self.obs_random_entity_counts_df))
+
+    def test_shape(self):
+        self.assertEqual(self.exp_randomentity_counts_df.shape, self.obs_random_entity_counts_df.shape)
+
+    def test_columns(self):
+        self.assertEqual(self.exp_randomentity_counts_df.columns.to_list(), self.obs_random_entity_counts_df.columns.to_list())
+
+    def test_values(self):
+        self.assertTrue((self.exp_randomentity_counts_df.values == self.obs_random_entity_counts_df.values).all().all())
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scripts/unittests/utilities/test_gen_shared_idhashes.py b/scripts/unittests/utilities/test_gen_shared_idhashes.py
@@ -0,0 +1,41 @@
+import unittest
+import os
+import sys
+import random
+import numpy as np
+
+sys.path.append(os.path.join(os.getcwd(), "scripts"))
+
+import cons
+from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict
+from utilities.gen_shared_idhashes import gen_shared_idhashes
+
+random.seed(cons.unittest_seed)
+np.random.seed(cons.unittest_seed)
+
+obs_prop_shared_idhashes=cons.data_model_shared_entities_dict["ip"]
+obs_hash_cnt_dict = gen_idhash_cnt_dict(idhash_type="hash", n=4, lam=1, nbytes=16)
+obs_shared_idhashes = gen_shared_idhashes(idhash_cnt_dict=obs_hash_cnt_dict, prop_shared_idhashes=obs_prop_shared_idhashes)
+exp_shared_idhashes = {}
+
+class Test_gen_shared_idhashes(unittest.TestCase):
+    """"""
+
+    def setUp(self):
+        self.exp_shared_idhashes = exp_shared_idhashes
+        self.obs_shared_idhashes = obs_shared_idhashes
+
+    def test_type(self):
+        self.assertEqual(type(self.exp_shared_idhashes), type(self.obs_shared_idhashes))
+
+    def test_len(self):
+        self.assertEqual(len(self.exp_shared_idhashes), len(self.obs_shared_idhashes))
+
+    def test_keys(self):
+        self.assertEqual(list(self.exp_shared_idhashes.keys()), list(self.obs_shared_idhashes.keys()))
+
+    def test_values(self):
+        self.assertEqual(list(self.exp_shared_idhashes.values()), list(self.obs_shared_idhashes.values()))
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scripts/unittests/utilities/test_remove_duplicate_idhashes.py b/scripts/unittests/utilities/test_remove_duplicate_idhashes.py
@@ -0,0 +1,43 @@
+import unittest
+import os
+import sys
+import numpy as np
+import pandas as pd
+import random
+
+sys.path.append(os.path.join(os.getcwd(), "scripts"))
+
+import cons
+from utilities.remove_duplicate_idhashes import remove_duplicate_idhashes
+
+random.seed(cons.unittest_seed)
+np.random.seed(seed=cons.unittest_seed)
+
+obs_random_duplicate_idhashes_dict = {'idhashes':[['63cea7c46926aa74'], ['63cea7c46926aa74', '37725417bd51fb40'], ['b95cb80aae9fbbfe'], ['dded2b63f8242648']]}
+obs_random_duplicate_idhashes_df = pd.DataFrame.from_dict(obs_random_duplicate_idhashes_dict, orient='columns')
+obs_random_duplicate_idhashes = remove_duplicate_idhashes(user_data=obs_random_duplicate_idhashes_df, idhash_col='idhashes')
+
+exp_random_duplicate_idhashes_dict = {'idhashes':[['63cea7c46926aa74'], ['37725417bd51fb40'], ['b95cb80aae9fbbfe'], ['dded2b63f8242648']]}
+exp_random_duplicate_idhashes = pd.DataFrame.from_dict(exp_random_duplicate_idhashes_dict, orient='columns')
+
+class Test_remove_duplicate_idhashes(unittest.TestCase):
+    """"""
+
+    def setUp(self):
+        self.exp_random_duplicate_idhashes = exp_random_duplicate_idhashes
+        self.obs_random_duplicate_idhashes = obs_random_duplicate_idhashes
+
+    def test_type(self):
+        self.assertEqual(type(self.exp_random_duplicate_idhashes), type(self.obs_random_duplicate_idhashes))
+
+    def test_shape(self):
+        self.assertEqual(self.exp_random_duplicate_idhashes.shape, self.obs_random_duplicate_idhashes.shape)
+
+    def test_columns(self):
+        self.assertEqual(self.exp_random_duplicate_idhashes.columns.to_list(), self.obs_random_duplicate_idhashes.columns.to_list())
+
+    def test_values(self):
+        self.assertTrue((self.exp_random_duplicate_idhashes.values == self.exp_random_duplicate_idhashes.values).all().all())
+
+if __name__ == "__main__":
+    unittest.main()