Skip to content

Commit

Permalink
Merge pull request #11 from oislen/dev
Browse files Browse the repository at this point in the history
New Unittests for Utilities
  • Loading branch information
oislen authored Oct 5, 2024
2 parents d467b99 + 800ddd7 commit 03b5bf8
Show file tree
Hide file tree
Showing 5 changed files with 259 additions and 0 deletions.
45 changes: 45 additions & 0 deletions scripts/unittests/utilities/test_gen_country_codes_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import unittest
import os
import sys
import numpy as np

sys.path.append(os.path.join(os.getcwd(), "scripts"))

import cons
from utilities.gen_country_codes_map import gen_country_codes_map

np.random.seed(cons.unittest_seed)

exp_country_codes_map = {
804: 'UA', 250: 'FR', 724: 'ES', 752: 'SE', 276: 'DE', 246: 'FI', 578: 'NO', 616: 'PL', 380: 'IT', 826: 'GB', 642: 'RO',
112: 'BY', 300: 'GR', 100: 'BG', 352: 'IS', 620: 'PT', 203: 'CZ', 208: 'DK', 348: 'HU', 688: 'RS', 40: 'AT', 372: 'IE',
440: 'LT', 428: 'LV', 191: 'HR', 70: 'BA', 703: 'SK', 233: 'EE', 528: 'NL', 756: 'CH', 498: 'MD', 56: 'BE', 8: 'AL',
807: 'MK', 705: 'SI', 499: 'ME', 196: 'CY', 442: 'LU', 234: 'FO', 20: 'AD', 470: 'MT', 438: 'LI', 831: 'GG', 674: 'SM',
292: 'GI', 492: 'MC', 336: 'VA'
}

fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1]
obs_country_codes_map = gen_country_codes_map(fpath_countrieseurope=fpath_countrieseurope)

class Test_gen_country_codes_dict(unittest.TestCase):
""""""

def setUp(self):
self.exp_country_codes_map = exp_country_codes_map
self.obs_country_codes_map = obs_country_codes_map

def test_type(self):
self.assertEqual(type(self.exp_country_codes_map), type(self.obs_country_codes_map))

def test_len(self):
self.assertEqual(len(self.exp_country_codes_map), len(self.obs_country_codes_map))

def test_keys(self):
self.assertEqual(list(self.exp_country_codes_map.keys()), list(self.obs_country_codes_map.keys()))

def test_values(self):
self.assertEqual(list(self.exp_country_codes_map.values()), list(self.obs_country_codes_map.values()))


if __name__ == "__main__":
unittest.main()
70 changes: 70 additions & 0 deletions scripts/unittests/utilities/test_gen_obj_idhash_series.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import unittest
import os
import sys
import random
import numpy as np
import pandas as pd

sys.path.append(os.path.join(os.getcwd(), "scripts"))

import cons

random.seed(cons.unittest_seed)
np.random.seed(cons.unittest_seed)

from utilities.gen_random_entity_counts import gen_random_entity_counts
from utilities.gen_obj_idhash_series import gen_obj_idhash_series
from objects.User import User
from objects.Device import Device

start_date = cons.unittest_registration_start_date
end_date = cons.unittest_registration_end_date
n_user_ids = cons.unittest_n_entities
fpath_firstnames = '.' + cons.fpath_firstnames.split(cons.fpath_repo_dir)[1]
fpath_lastnames = '.' + cons.fpath_lastnames.split(cons.fpath_repo_dir)[1]
fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1]
fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1]
fpath_smartphones = '.' + cons.fpath_smartphones.split(cons.fpath_repo_dir)[1]

random.seed(cons.unittest_seed)
np.random.seed(cons.unittest_seed)

# create user object
user_object = User(n_user_ids=n_user_ids, start_date=start_date, end_date=end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email)
# generate random entity counts
random_entity_counts = gen_random_entity_counts(user_obj=user_object)
# generate random entity values
device_obj = Device(n_device_hashes=random_entity_counts['n_devices'].sum(), fpath_smartphones=fpath_smartphones)
# generate user data and device hashes
user_data = random_entity_counts.copy()
obs_obj_idhash_series = gen_obj_idhash_series(idhashes_props_dict=device_obj.device_hashes_props_dict, n_counts_series=user_data['n_devices'])
exp_obj_idhash_series = pd.Series([['2ff23757073a0735'], ['73d2fd828c1fd115'], ['2fc83030d4f37f76', '20f0fba2565dd55c', '257aa14d0bef04bc'], ['f232f0f0bbdcd452']])

class Test_gen_idhash_cnt_dict(unittest.TestCase):
""""""

def setUp(self):
self.obs_obj_idhash_series = obs_obj_idhash_series
self.exp_obj_idhash_series = exp_obj_idhash_series

def test_type(self):
self.assertEqual(type(self.obs_obj_idhash_series), type(self.exp_obj_idhash_series))

def test_shape(self):
self.assertEqual(self.obs_obj_idhash_series.shape, self.exp_obj_idhash_series.shape)

def test_dtypes(self):
self.assertEqual(self.obs_obj_idhash_series.dtypes, self.exp_obj_idhash_series.dtypes)

def test_isnull(self):
self.assertTrue((self.obs_obj_idhash_series.isnull() == self.exp_obj_idhash_series.isnull()).all().all())

def test_notnull(self):
self.assertTrue((self.obs_obj_idhash_series.notnull() == self.exp_obj_idhash_series.notnull()).all().all())

def test_object(self):
self.assertTrue((self.obs_obj_idhash_series.explode() == self.exp_obj_idhash_series.explode()).all().all())


if __name__ == "__main__":
unittest.main()
60 changes: 60 additions & 0 deletions scripts/unittests/utilities/test_gen_random_entity_counts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import unittest
import os
import sys
import numpy as np
import pandas as pd
import random

sys.path.append(os.path.join(os.getcwd(), "scripts"))

import cons
from utilities.gen_random_entity_counts import gen_random_entity_counts
from objects.User import User

exp_start_date = cons.unittest_registration_start_date
exp_end_date = cons.unittest_registration_end_date
exp_n_user_ids = cons.unittest_n_entities
exp_lam = cons.data_model_poisson_params["user"]["lambda"]

random.seed(cons.unittest_seed)
np.random.seed(cons.unittest_seed)

fpath_firstnames = '.' + cons.fpath_firstnames.split(cons.fpath_repo_dir)[1]
fpath_lastnames = '.' + cons.fpath_lastnames.split(cons.fpath_repo_dir)[1]
fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1]
fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1]
user_object = User(exp_n_user_ids, exp_start_date, exp_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email)

exp_randomentity_counts_dict = {
'uid': ['4264861381989413', '6374692674377254', '1751409580926382', '6720317315593519'],
'n_devices': [1, 1, 3, 1],
'n_cards': [1, 1, 1, 1],
'n_ips': [6, 5, 6, 3],
'n_transactions': [14, 41, 68, 55],
'n_applications': [1, 7, 20, 3]
}

exp_randomentity_counts_df = pd.DataFrame.from_dict(exp_randomentity_counts_dict)
obs_random_entity_counts_df = gen_random_entity_counts(user_object)

class Test_gen_random_entity_counts(unittest.TestCase):
""""""

def setUp(self):
self.exp_randomentity_counts_df = exp_randomentity_counts_df
self.obs_random_entity_counts_df = obs_random_entity_counts_df

def test_type(self):
self.assertEqual(type(self.exp_randomentity_counts_df), type(self.obs_random_entity_counts_df))

def test_shape(self):
self.assertEqual(self.exp_randomentity_counts_df.shape, self.obs_random_entity_counts_df.shape)

def test_columns(self):
self.assertEqual(self.exp_randomentity_counts_df.columns.to_list(), self.obs_random_entity_counts_df.columns.to_list())

def test_values(self):
self.assertTrue((self.exp_randomentity_counts_df.values == self.obs_random_entity_counts_df.values).all().all())

if __name__ == "__main__":
unittest.main()
41 changes: 41 additions & 0 deletions scripts/unittests/utilities/test_gen_shared_idhashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import unittest
import os
import sys
import random
import numpy as np

sys.path.append(os.path.join(os.getcwd(), "scripts"))

import cons
from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict
from utilities.gen_shared_idhashes import gen_shared_idhashes

random.seed(cons.unittest_seed)
np.random.seed(cons.unittest_seed)

obs_prop_shared_idhashes=cons.data_model_shared_entities_dict["ip"]
obs_hash_cnt_dict = gen_idhash_cnt_dict(idhash_type="hash", n=4, lam=1, nbytes=16)
obs_shared_idhashes = gen_shared_idhashes(idhash_cnt_dict=obs_hash_cnt_dict, prop_shared_idhashes=obs_prop_shared_idhashes)
exp_shared_idhashes = {}

class Test_gen_shared_idhashes(unittest.TestCase):
""""""

def setUp(self):
self.exp_shared_idhashes = exp_shared_idhashes
self.obs_shared_idhashes = obs_shared_idhashes

def test_type(self):
self.assertEqual(type(self.exp_shared_idhashes), type(self.obs_shared_idhashes))

def test_len(self):
self.assertEqual(len(self.exp_shared_idhashes), len(self.obs_shared_idhashes))

def test_keys(self):
self.assertEqual(list(self.exp_shared_idhashes.keys()), list(self.obs_shared_idhashes.keys()))

def test_values(self):
self.assertEqual(list(self.exp_shared_idhashes.values()), list(self.obs_shared_idhashes.values()))

if __name__ == "__main__":
unittest.main()
43 changes: 43 additions & 0 deletions scripts/unittests/utilities/test_remove_duplicate_idhashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import unittest
import os
import sys
import numpy as np
import pandas as pd
import random

sys.path.append(os.path.join(os.getcwd(), "scripts"))

import cons
from utilities.remove_duplicate_idhashes import remove_duplicate_idhashes

random.seed(cons.unittest_seed)
np.random.seed(seed=cons.unittest_seed)

obs_random_duplicate_idhashes_dict = {'idhashes':[['63cea7c46926aa74'], ['63cea7c46926aa74', '37725417bd51fb40'], ['b95cb80aae9fbbfe'], ['dded2b63f8242648']]}
obs_random_duplicate_idhashes_df = pd.DataFrame.from_dict(obs_random_duplicate_idhashes_dict, orient='columns')
obs_random_duplicate_idhashes = remove_duplicate_idhashes(user_data=obs_random_duplicate_idhashes_df, idhash_col='idhashes')

exp_random_duplicate_idhashes_dict = {'idhashes':[['63cea7c46926aa74'], ['37725417bd51fb40'], ['b95cb80aae9fbbfe'], ['dded2b63f8242648']]}
exp_random_duplicate_idhashes = pd.DataFrame.from_dict(exp_random_duplicate_idhashes_dict, orient='columns')

class Test_remove_duplicate_idhashes(unittest.TestCase):
""""""

def setUp(self):
self.exp_random_duplicate_idhashes = exp_random_duplicate_idhashes
self.obs_random_duplicate_idhashes = obs_random_duplicate_idhashes

def test_type(self):
self.assertEqual(type(self.exp_random_duplicate_idhashes), type(self.obs_random_duplicate_idhashes))

def test_shape(self):
self.assertEqual(self.exp_random_duplicate_idhashes.shape, self.obs_random_duplicate_idhashes.shape)

def test_columns(self):
self.assertEqual(self.exp_random_duplicate_idhashes.columns.to_list(), self.obs_random_duplicate_idhashes.columns.to_list())

def test_values(self):
self.assertTrue((self.exp_random_duplicate_idhashes.values == self.exp_random_duplicate_idhashes.values).all().all())

if __name__ == "__main__":
unittest.main()

0 comments on commit 03b5bf8

Please sign in to comment.