From 03c64b5d9cd290cc4102df7f0c1f63c5ec6b51ae Mon Sep 17 00:00:00 2001 From: Oisin Date: Fri, 4 Oct 2024 16:59:37 +0100 Subject: [PATCH 1/5] Added unittest for gen_random_entity_counts --- .../test_gen_random_entity_counts.py | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 scripts/unittests/utilities/test_gen_random_entity_counts.py diff --git a/scripts/unittests/utilities/test_gen_random_entity_counts.py b/scripts/unittests/utilities/test_gen_random_entity_counts.py new file mode 100644 index 0000000..ebf9135 --- /dev/null +++ b/scripts/unittests/utilities/test_gen_random_entity_counts.py @@ -0,0 +1,60 @@ +import unittest +import os +import sys +import numpy as np +import pandas as pd +import random + +sys.path.append(os.path.join(os.getcwd(), "scripts")) + +import cons +from utilities.gen_random_entity_counts import gen_random_entity_counts +from objects.User import User + +exp_start_date = cons.unittest_registration_start_date +exp_end_date = cons.unittest_registration_end_date +exp_n_user_ids = cons.unittest_n_entities +exp_lam = cons.data_model_poisson_params["user"]["lambda"] + +random.seed(cons.unittest_seed) +np.random.seed(cons.unittest_seed) + +fpath_firstnames = '.' + cons.fpath_firstnames.split(cons.fpath_repo_dir)[1] +fpath_lastnames = '.' + cons.fpath_lastnames.split(cons.fpath_repo_dir)[1] +fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] +fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1] +user_object = User(exp_n_user_ids, exp_start_date, exp_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email) + +exp_randomentity_counts_dict = { + 'uid': ['4264861381989413', '6374692674377254', '1751409580926382', '6720317315593519'], + 'n_devices': [1, 1, 3, 1], + 'n_cards': [1, 1, 1, 1], + 'n_ips': [6, 5, 6, 3], + 'n_transactions': [14, 41, 68, 55], + 'n_applications': [1, 7, 20, 3] + } + +exp_randomentity_counts_df = pd.DataFrame.from_dict(exp_randomentity_counts_dict) +obs_random_entity_counts_df = gen_random_entity_counts(user_object) + +class Test_gen_random_entity_counts(unittest.TestCase): + """""" + + def setUp(self): + self.exp_randomentity_counts_df = exp_randomentity_counts_df + self.obs_random_entity_counts_df = obs_random_entity_counts_df + + def test_type(self): + self.assertEqual(type(self.exp_randomentity_counts_df), type(self.obs_random_entity_counts_df)) + + def test_shape(self): + self.assertEqual(self.exp_randomentity_counts_df.shape, self.obs_random_entity_counts_df.shape) + + def test_columns(self): + self.assertEqual(self.exp_randomentity_counts_df.columns.to_list(), self.obs_random_entity_counts_df.columns.to_list()) + + def test_values(self): + self.assertTrue((self.exp_randomentity_counts_df.values == self.obs_random_entity_counts_df.values).all().all()) + +if __name__ == "__main__": + unittest.main() From 8921f2a18c73ab8947a7962cf8357edd026d081a Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 5 Oct 2024 00:04:17 +0100 Subject: [PATCH 2/5] Created unittests for remove duplicate idhashes --- .../test_remove_duplicate_idhashes.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 scripts/unittests/utilities/test_remove_duplicate_idhashes.py diff --git a/scripts/unittests/utilities/test_remove_duplicate_idhashes.py b/scripts/unittests/utilities/test_remove_duplicate_idhashes.py new file mode 100644 index 0000000..5e5be67 --- /dev/null +++ b/scripts/unittests/utilities/test_remove_duplicate_idhashes.py @@ -0,0 +1,43 @@ +import unittest +import os +import sys +import numpy as np +import pandas as pd +import random + +sys.path.append(os.path.join(os.getcwd(), "scripts")) + +import cons +from utilities.remove_duplicate_idhashes import remove_duplicate_idhashes + +random.seed(cons.unittest_seed) +np.random.seed(seed=cons.unittest_seed) + +obs_random_duplicate_idhashes_dict = {'idhashes':[['63cea7c46926aa74'], ['63cea7c46926aa74', '37725417bd51fb40'], ['b95cb80aae9fbbfe'], ['dded2b63f8242648']]} +obs_random_duplicate_idhashes_df = pd.DataFrame.from_dict(obs_random_duplicate_idhashes_dict, orient='columns') +obs_random_duplicate_idhashes = remove_duplicate_idhashes(user_data=obs_random_duplicate_idhashes_df, idhash_col='idhashes') + +exp_random_duplicate_idhashes_dict = {'idhashes':[['63cea7c46926aa74'], ['37725417bd51fb40'], ['b95cb80aae9fbbfe'], ['dded2b63f8242648']]} +exp_random_duplicate_idhashes = pd.DataFrame.from_dict(exp_random_duplicate_idhashes_dict, orient='columns') + +class Test_remove_duplicate_idhashes(unittest.TestCase): + """""" + + def setUp(self): + self.exp_random_duplicate_idhashes = exp_random_duplicate_idhashes + self.obs_random_duplicate_idhashes = obs_random_duplicate_idhashes + + def test_type(self): + self.assertEqual(type(self.exp_random_duplicate_idhashes), type(self.obs_random_duplicate_idhashes)) + + def test_shape(self): + self.assertEqual(self.exp_random_duplicate_idhashes.shape, self.obs_random_duplicate_idhashes.shape) + + def test_columns(self): + self.assertEqual(self.exp_random_duplicate_idhashes.columns.to_list(), self.obs_random_duplicate_idhashes.columns.to_list()) + + def test_values(self): + self.assertTrue((self.exp_random_duplicate_idhashes.values == self.exp_random_duplicate_idhashes.values).all().all()) + +if __name__ == "__main__": + unittest.main() From 937a0af7eef3f4c19490338b24ac23e5d2291552 Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 5 Oct 2024 17:14:03 +0100 Subject: [PATCH 3/5] Added unittests for gen_shared_idhashes --- .../utilities/test_gen_shared_idhashes.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 scripts/unittests/utilities/test_gen_shared_idhashes.py diff --git a/scripts/unittests/utilities/test_gen_shared_idhashes.py b/scripts/unittests/utilities/test_gen_shared_idhashes.py new file mode 100644 index 0000000..08239ae --- /dev/null +++ b/scripts/unittests/utilities/test_gen_shared_idhashes.py @@ -0,0 +1,41 @@ +import unittest +import os +import sys +import random +import numpy as np + +sys.path.append(os.path.join(os.getcwd(), "scripts")) + +import cons +from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict +from utilities.gen_shared_idhashes import gen_shared_idhashes + +random.seed(cons.unittest_seed) +np.random.seed(cons.unittest_seed) + +obs_prop_shared_idhashes=cons.data_model_shared_entities_dict["ip"] +obs_hash_cnt_dict = gen_idhash_cnt_dict(idhash_type="hash", n=4, lam=1, nbytes=16) +obs_shared_idhashes = gen_shared_idhashes(idhash_cnt_dict=obs_hash_cnt_dict, prop_shared_idhashes=obs_prop_shared_idhashes) +exp_shared_idhashes = {} + +class Test_gen_shared_idhashes(unittest.TestCase): + """""" + + def setUp(self): + self.exp_shared_idhashes = exp_shared_idhashes + self.obs_shared_idhashes = obs_shared_idhashes + + def test_type(self): + self.assertEqual(type(self.exp_shared_idhashes), type(self.obs_shared_idhashes)) + + def test_len(self): + self.assertEqual(len(self.exp_shared_idhashes), len(self.obs_shared_idhashes)) + + def test_keys(self): + self.assertEqual(list(self.exp_shared_idhashes.keys()), list(self.obs_shared_idhashes.keys())) + + def test_values(self): + self.assertEqual(list(self.exp_shared_idhashes.values()), list(self.obs_shared_idhashes.values())) + +if __name__ == "__main__": + unittest.main() From a66a97ad3a1367328f86cc85f48190b030b5384d Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 5 Oct 2024 17:22:47 +0100 Subject: [PATCH 4/5] Added unittests for gen_country_codes_map --- .../utilities/test_gen_country_codes_map.py | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 scripts/unittests/utilities/test_gen_country_codes_map.py diff --git a/scripts/unittests/utilities/test_gen_country_codes_map.py b/scripts/unittests/utilities/test_gen_country_codes_map.py new file mode 100644 index 0000000..a5f145c --- /dev/null +++ b/scripts/unittests/utilities/test_gen_country_codes_map.py @@ -0,0 +1,45 @@ +import unittest +import os +import sys +import numpy as np + +sys.path.append(os.path.join(os.getcwd(), "scripts")) + +import cons +from utilities.gen_country_codes_map import gen_country_codes_map + +np.random.seed(cons.unittest_seed) + +exp_country_codes_map = { + 804: 'UA', 250: 'FR', 724: 'ES', 752: 'SE', 276: 'DE', 246: 'FI', 578: 'NO', 616: 'PL', 380: 'IT', 826: 'GB', 642: 'RO', + 112: 'BY', 300: 'GR', 100: 'BG', 352: 'IS', 620: 'PT', 203: 'CZ', 208: 'DK', 348: 'HU', 688: 'RS', 40: 'AT', 372: 'IE', + 440: 'LT', 428: 'LV', 191: 'HR', 70: 'BA', 703: 'SK', 233: 'EE', 528: 'NL', 756: 'CH', 498: 'MD', 56: 'BE', 8: 'AL', + 807: 'MK', 705: 'SI', 499: 'ME', 196: 'CY', 442: 'LU', 234: 'FO', 20: 'AD', 470: 'MT', 438: 'LI', 831: 'GG', 674: 'SM', + 292: 'GI', 492: 'MC', 336: 'VA' + } + +fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] +obs_country_codes_map = gen_country_codes_map(fpath_countrieseurope=fpath_countrieseurope) + +class Test_gen_country_codes_dict(unittest.TestCase): + """""" + + def setUp(self): + self.exp_country_codes_map = exp_country_codes_map + self.obs_country_codes_map = obs_country_codes_map + + def test_type(self): + self.assertEqual(type(self.exp_country_codes_map), type(self.obs_country_codes_map)) + + def test_len(self): + self.assertEqual(len(self.exp_country_codes_map), len(self.obs_country_codes_map)) + + def test_keys(self): + self.assertEqual(list(self.exp_country_codes_map.keys()), list(self.obs_country_codes_map.keys())) + + def test_values(self): + self.assertEqual(list(self.exp_country_codes_map.values()), list(self.obs_country_codes_map.values())) + + +if __name__ == "__main__": + unittest.main() From 800ddd7dfdaa19a7a345d988088216b682243b56 Mon Sep 17 00:00:00 2001 From: Oisin Date: Sat, 5 Oct 2024 18:12:43 +0100 Subject: [PATCH 5/5] Added unittests for gen_obj_idhash_series --- .../utilities/test_gen_obj_idhash_series.py | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 scripts/unittests/utilities/test_gen_obj_idhash_series.py diff --git a/scripts/unittests/utilities/test_gen_obj_idhash_series.py b/scripts/unittests/utilities/test_gen_obj_idhash_series.py new file mode 100644 index 0000000..7817d8f --- /dev/null +++ b/scripts/unittests/utilities/test_gen_obj_idhash_series.py @@ -0,0 +1,70 @@ +import unittest +import os +import sys +import random +import numpy as np +import pandas as pd + +sys.path.append(os.path.join(os.getcwd(), "scripts")) + +import cons + +random.seed(cons.unittest_seed) +np.random.seed(cons.unittest_seed) + +from utilities.gen_random_entity_counts import gen_random_entity_counts +from utilities.gen_obj_idhash_series import gen_obj_idhash_series +from objects.User import User +from objects.Device import Device + +start_date = cons.unittest_registration_start_date +end_date = cons.unittest_registration_end_date +n_user_ids = cons.unittest_n_entities +fpath_firstnames = '.' + cons.fpath_firstnames.split(cons.fpath_repo_dir)[1] +fpath_lastnames = '.' + cons.fpath_lastnames.split(cons.fpath_repo_dir)[1] +fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] +fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1] +fpath_smartphones = '.' + cons.fpath_smartphones.split(cons.fpath_repo_dir)[1] + +random.seed(cons.unittest_seed) +np.random.seed(cons.unittest_seed) + +# create user object +user_object = User(n_user_ids=n_user_ids, start_date=start_date, end_date=end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email) +# generate random entity counts +random_entity_counts = gen_random_entity_counts(user_obj=user_object) +# generate random entity values +device_obj = Device(n_device_hashes=random_entity_counts['n_devices'].sum(), fpath_smartphones=fpath_smartphones) +# generate user data and device hashes +user_data = random_entity_counts.copy() +obs_obj_idhash_series = gen_obj_idhash_series(idhashes_props_dict=device_obj.device_hashes_props_dict, n_counts_series=user_data['n_devices']) +exp_obj_idhash_series = pd.Series([['2ff23757073a0735'], ['73d2fd828c1fd115'], ['2fc83030d4f37f76', '20f0fba2565dd55c', '257aa14d0bef04bc'], ['f232f0f0bbdcd452']]) + +class Test_gen_idhash_cnt_dict(unittest.TestCase): + """""" + + def setUp(self): + self.obs_obj_idhash_series = obs_obj_idhash_series + self.exp_obj_idhash_series = exp_obj_idhash_series + + def test_type(self): + self.assertEqual(type(self.obs_obj_idhash_series), type(self.exp_obj_idhash_series)) + + def test_shape(self): + self.assertEqual(self.obs_obj_idhash_series.shape, self.exp_obj_idhash_series.shape) + + def test_dtypes(self): + self.assertEqual(self.obs_obj_idhash_series.dtypes, self.exp_obj_idhash_series.dtypes) + + def test_isnull(self): + self.assertTrue((self.obs_obj_idhash_series.isnull() == self.exp_obj_idhash_series.isnull()).all().all()) + + def test_notnull(self): + self.assertTrue((self.obs_obj_idhash_series.notnull() == self.exp_obj_idhash_series.notnull()).all().all()) + + def test_object(self): + self.assertTrue((self.obs_obj_idhash_series.explode() == self.exp_obj_idhash_series.explode()).all().all()) + + +if __name__ == "__main__": + unittest.main()