-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11 from oislen/dev
New Unittests for Utilities
- Loading branch information
Showing
5 changed files
with
259 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import unittest | ||
import os | ||
import sys | ||
import numpy as np | ||
|
||
sys.path.append(os.path.join(os.getcwd(), "scripts")) | ||
|
||
import cons | ||
from utilities.gen_country_codes_map import gen_country_codes_map | ||
|
||
np.random.seed(cons.unittest_seed) | ||
|
||
exp_country_codes_map = { | ||
804: 'UA', 250: 'FR', 724: 'ES', 752: 'SE', 276: 'DE', 246: 'FI', 578: 'NO', 616: 'PL', 380: 'IT', 826: 'GB', 642: 'RO', | ||
112: 'BY', 300: 'GR', 100: 'BG', 352: 'IS', 620: 'PT', 203: 'CZ', 208: 'DK', 348: 'HU', 688: 'RS', 40: 'AT', 372: 'IE', | ||
440: 'LT', 428: 'LV', 191: 'HR', 70: 'BA', 703: 'SK', 233: 'EE', 528: 'NL', 756: 'CH', 498: 'MD', 56: 'BE', 8: 'AL', | ||
807: 'MK', 705: 'SI', 499: 'ME', 196: 'CY', 442: 'LU', 234: 'FO', 20: 'AD', 470: 'MT', 438: 'LI', 831: 'GG', 674: 'SM', | ||
292: 'GI', 492: 'MC', 336: 'VA' | ||
} | ||
|
||
fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] | ||
obs_country_codes_map = gen_country_codes_map(fpath_countrieseurope=fpath_countrieseurope) | ||
|
||
class Test_gen_country_codes_dict(unittest.TestCase): | ||
"""""" | ||
|
||
def setUp(self): | ||
self.exp_country_codes_map = exp_country_codes_map | ||
self.obs_country_codes_map = obs_country_codes_map | ||
|
||
def test_type(self): | ||
self.assertEqual(type(self.exp_country_codes_map), type(self.obs_country_codes_map)) | ||
|
||
def test_len(self): | ||
self.assertEqual(len(self.exp_country_codes_map), len(self.obs_country_codes_map)) | ||
|
||
def test_keys(self): | ||
self.assertEqual(list(self.exp_country_codes_map.keys()), list(self.obs_country_codes_map.keys())) | ||
|
||
def test_values(self): | ||
self.assertEqual(list(self.exp_country_codes_map.values()), list(self.obs_country_codes_map.values())) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import unittest | ||
import os | ||
import sys | ||
import random | ||
import numpy as np | ||
import pandas as pd | ||
|
||
sys.path.append(os.path.join(os.getcwd(), "scripts")) | ||
|
||
import cons | ||
|
||
random.seed(cons.unittest_seed) | ||
np.random.seed(cons.unittest_seed) | ||
|
||
from utilities.gen_random_entity_counts import gen_random_entity_counts | ||
from utilities.gen_obj_idhash_series import gen_obj_idhash_series | ||
from objects.User import User | ||
from objects.Device import Device | ||
|
||
start_date = cons.unittest_registration_start_date | ||
end_date = cons.unittest_registration_end_date | ||
n_user_ids = cons.unittest_n_entities | ||
fpath_firstnames = '.' + cons.fpath_firstnames.split(cons.fpath_repo_dir)[1] | ||
fpath_lastnames = '.' + cons.fpath_lastnames.split(cons.fpath_repo_dir)[1] | ||
fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] | ||
fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1] | ||
fpath_smartphones = '.' + cons.fpath_smartphones.split(cons.fpath_repo_dir)[1] | ||
|
||
random.seed(cons.unittest_seed) | ||
np.random.seed(cons.unittest_seed) | ||
|
||
# create user object | ||
user_object = User(n_user_ids=n_user_ids, start_date=start_date, end_date=end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email) | ||
# generate random entity counts | ||
random_entity_counts = gen_random_entity_counts(user_obj=user_object) | ||
# generate random entity values | ||
device_obj = Device(n_device_hashes=random_entity_counts['n_devices'].sum(), fpath_smartphones=fpath_smartphones) | ||
# generate user data and device hashes | ||
user_data = random_entity_counts.copy() | ||
obs_obj_idhash_series = gen_obj_idhash_series(idhashes_props_dict=device_obj.device_hashes_props_dict, n_counts_series=user_data['n_devices']) | ||
exp_obj_idhash_series = pd.Series([['2ff23757073a0735'], ['73d2fd828c1fd115'], ['2fc83030d4f37f76', '20f0fba2565dd55c', '257aa14d0bef04bc'], ['f232f0f0bbdcd452']]) | ||
|
||
class Test_gen_idhash_cnt_dict(unittest.TestCase): | ||
"""""" | ||
|
||
def setUp(self): | ||
self.obs_obj_idhash_series = obs_obj_idhash_series | ||
self.exp_obj_idhash_series = exp_obj_idhash_series | ||
|
||
def test_type(self): | ||
self.assertEqual(type(self.obs_obj_idhash_series), type(self.exp_obj_idhash_series)) | ||
|
||
def test_shape(self): | ||
self.assertEqual(self.obs_obj_idhash_series.shape, self.exp_obj_idhash_series.shape) | ||
|
||
def test_dtypes(self): | ||
self.assertEqual(self.obs_obj_idhash_series.dtypes, self.exp_obj_idhash_series.dtypes) | ||
|
||
def test_isnull(self): | ||
self.assertTrue((self.obs_obj_idhash_series.isnull() == self.exp_obj_idhash_series.isnull()).all().all()) | ||
|
||
def test_notnull(self): | ||
self.assertTrue((self.obs_obj_idhash_series.notnull() == self.exp_obj_idhash_series.notnull()).all().all()) | ||
|
||
def test_object(self): | ||
self.assertTrue((self.obs_obj_idhash_series.explode() == self.exp_obj_idhash_series.explode()).all().all()) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
60 changes: 60 additions & 0 deletions
60
scripts/unittests/utilities/test_gen_random_entity_counts.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import unittest | ||
import os | ||
import sys | ||
import numpy as np | ||
import pandas as pd | ||
import random | ||
|
||
sys.path.append(os.path.join(os.getcwd(), "scripts")) | ||
|
||
import cons | ||
from utilities.gen_random_entity_counts import gen_random_entity_counts | ||
from objects.User import User | ||
|
||
exp_start_date = cons.unittest_registration_start_date | ||
exp_end_date = cons.unittest_registration_end_date | ||
exp_n_user_ids = cons.unittest_n_entities | ||
exp_lam = cons.data_model_poisson_params["user"]["lambda"] | ||
|
||
random.seed(cons.unittest_seed) | ||
np.random.seed(cons.unittest_seed) | ||
|
||
fpath_firstnames = '.' + cons.fpath_firstnames.split(cons.fpath_repo_dir)[1] | ||
fpath_lastnames = '.' + cons.fpath_lastnames.split(cons.fpath_repo_dir)[1] | ||
fpath_countrieseurope = '.' + cons.fpath_countrieseurope.split(cons.fpath_repo_dir)[1] | ||
fpath_domain_email = '.' + cons.fpath_domain_email.split(cons.fpath_repo_dir)[1] | ||
user_object = User(exp_n_user_ids, exp_start_date, exp_end_date, fpath_firstnames=fpath_firstnames, fpath_lastnames=fpath_lastnames, fpath_countrieseurope=fpath_countrieseurope, fpath_domain_email=fpath_domain_email) | ||
|
||
exp_randomentity_counts_dict = { | ||
'uid': ['4264861381989413', '6374692674377254', '1751409580926382', '6720317315593519'], | ||
'n_devices': [1, 1, 3, 1], | ||
'n_cards': [1, 1, 1, 1], | ||
'n_ips': [6, 5, 6, 3], | ||
'n_transactions': [14, 41, 68, 55], | ||
'n_applications': [1, 7, 20, 3] | ||
} | ||
|
||
exp_randomentity_counts_df = pd.DataFrame.from_dict(exp_randomentity_counts_dict) | ||
obs_random_entity_counts_df = gen_random_entity_counts(user_object) | ||
|
||
class Test_gen_random_entity_counts(unittest.TestCase): | ||
"""""" | ||
|
||
def setUp(self): | ||
self.exp_randomentity_counts_df = exp_randomentity_counts_df | ||
self.obs_random_entity_counts_df = obs_random_entity_counts_df | ||
|
||
def test_type(self): | ||
self.assertEqual(type(self.exp_randomentity_counts_df), type(self.obs_random_entity_counts_df)) | ||
|
||
def test_shape(self): | ||
self.assertEqual(self.exp_randomentity_counts_df.shape, self.obs_random_entity_counts_df.shape) | ||
|
||
def test_columns(self): | ||
self.assertEqual(self.exp_randomentity_counts_df.columns.to_list(), self.obs_random_entity_counts_df.columns.to_list()) | ||
|
||
def test_values(self): | ||
self.assertTrue((self.exp_randomentity_counts_df.values == self.obs_random_entity_counts_df.values).all().all()) | ||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import unittest | ||
import os | ||
import sys | ||
import random | ||
import numpy as np | ||
|
||
sys.path.append(os.path.join(os.getcwd(), "scripts")) | ||
|
||
import cons | ||
from utilities.gen_idhash_cnt_dict import gen_idhash_cnt_dict | ||
from utilities.gen_shared_idhashes import gen_shared_idhashes | ||
|
||
random.seed(cons.unittest_seed) | ||
np.random.seed(cons.unittest_seed) | ||
|
||
obs_prop_shared_idhashes=cons.data_model_shared_entities_dict["ip"] | ||
obs_hash_cnt_dict = gen_idhash_cnt_dict(idhash_type="hash", n=4, lam=1, nbytes=16) | ||
obs_shared_idhashes = gen_shared_idhashes(idhash_cnt_dict=obs_hash_cnt_dict, prop_shared_idhashes=obs_prop_shared_idhashes) | ||
exp_shared_idhashes = {} | ||
|
||
class Test_gen_shared_idhashes(unittest.TestCase): | ||
"""""" | ||
|
||
def setUp(self): | ||
self.exp_shared_idhashes = exp_shared_idhashes | ||
self.obs_shared_idhashes = obs_shared_idhashes | ||
|
||
def test_type(self): | ||
self.assertEqual(type(self.exp_shared_idhashes), type(self.obs_shared_idhashes)) | ||
|
||
def test_len(self): | ||
self.assertEqual(len(self.exp_shared_idhashes), len(self.obs_shared_idhashes)) | ||
|
||
def test_keys(self): | ||
self.assertEqual(list(self.exp_shared_idhashes.keys()), list(self.obs_shared_idhashes.keys())) | ||
|
||
def test_values(self): | ||
self.assertEqual(list(self.exp_shared_idhashes.values()), list(self.obs_shared_idhashes.values())) | ||
|
||
if __name__ == "__main__": | ||
unittest.main() |
43 changes: 43 additions & 0 deletions
43
scripts/unittests/utilities/test_remove_duplicate_idhashes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import unittest | ||
import os | ||
import sys | ||
import numpy as np | ||
import pandas as pd | ||
import random | ||
|
||
sys.path.append(os.path.join(os.getcwd(), "scripts")) | ||
|
||
import cons | ||
from utilities.remove_duplicate_idhashes import remove_duplicate_idhashes | ||
|
||
random.seed(cons.unittest_seed) | ||
np.random.seed(seed=cons.unittest_seed) | ||
|
||
obs_random_duplicate_idhashes_dict = {'idhashes':[['63cea7c46926aa74'], ['63cea7c46926aa74', '37725417bd51fb40'], ['b95cb80aae9fbbfe'], ['dded2b63f8242648']]} | ||
obs_random_duplicate_idhashes_df = pd.DataFrame.from_dict(obs_random_duplicate_idhashes_dict, orient='columns') | ||
obs_random_duplicate_idhashes = remove_duplicate_idhashes(user_data=obs_random_duplicate_idhashes_df, idhash_col='idhashes') | ||
|
||
exp_random_duplicate_idhashes_dict = {'idhashes':[['63cea7c46926aa74'], ['37725417bd51fb40'], ['b95cb80aae9fbbfe'], ['dded2b63f8242648']]} | ||
exp_random_duplicate_idhashes = pd.DataFrame.from_dict(exp_random_duplicate_idhashes_dict, orient='columns') | ||
|
||
class Test_remove_duplicate_idhashes(unittest.TestCase): | ||
"""""" | ||
|
||
def setUp(self): | ||
self.exp_random_duplicate_idhashes = exp_random_duplicate_idhashes | ||
self.obs_random_duplicate_idhashes = obs_random_duplicate_idhashes | ||
|
||
def test_type(self): | ||
self.assertEqual(type(self.exp_random_duplicate_idhashes), type(self.obs_random_duplicate_idhashes)) | ||
|
||
def test_shape(self): | ||
self.assertEqual(self.exp_random_duplicate_idhashes.shape, self.obs_random_duplicate_idhashes.shape) | ||
|
||
def test_columns(self): | ||
self.assertEqual(self.exp_random_duplicate_idhashes.columns.to_list(), self.obs_random_duplicate_idhashes.columns.to_list()) | ||
|
||
def test_values(self): | ||
self.assertTrue((self.exp_random_duplicate_idhashes.values == self.exp_random_duplicate_idhashes.values).all().all()) | ||
|
||
if __name__ == "__main__": | ||
unittest.main() |