diff --git a/scripts/app/gen_trans_data.py b/scripts/app/gen_trans_data.py index 2b7aad9..81afd65 100644 --- a/scripts/app/gen_trans_data.py +++ b/scripts/app/gen_trans_data.py @@ -4,7 +4,6 @@ import cons from utilities.gen_country_codes_map import gen_country_codes_map from utilities.align_country_codes import align_country_codes -from utilities.gen_trans_error_codes import gen_trans_error_codes from utilities.gen_trans_rejection_rates import gen_trans_rejection_rates from utilities.gen_trans_status import gen_trans_status @@ -88,14 +87,10 @@ def gen_trans_data(user_data, device_obj, card_obj, ip_obj, transaction_obj, app trans_data['card_country_code'] = trans_data['card_country_code'].replace(country_codes_map) trans_data['ip_country_code'] = trans_data['ip_country_code'].replace(country_codes_map) - # generate transaction status - trans_data['transaction_status'] = trans_data['transaction_hash'].replace(transaction_obj.transaction_hashes_status_dict) + # generate transaction status and error code rejection_rates_dict = gen_trans_rejection_rates(trans_data = trans_data) - trans_data['transaction_status'] = trans_data.apply(lambda series: gen_trans_status(series = series, rejection_rates_dict = rejection_rates_dict), axis = 1) + trans_data[['transaction_status', 'transaction_error_code']] = trans_data.apply(lambda series: gen_trans_status(series = series, rejection_rates_dict = rejection_rates_dict, rejection_codes = transaction_obj.rejection_codes), result_type = 'expand', axis = 1) - # add transaction status and error codes - trans_data['transaction_error_code'] = gen_trans_error_codes(trans_data = trans_data, trans_status_col = 'transaction_status', rejection_codes = transaction_obj.rejection_codes) - # sort data by transaction date trans_data = trans_data.sort_values(by = 'transaction_date').reset_index(drop = True) return trans_data \ No newline at end of file diff --git a/scripts/utilities/gen_trans_error_codes.py b/scripts/utilities/arch/gen_trans_error_codes.py similarity index 100% rename from scripts/utilities/gen_trans_error_codes.py rename to scripts/utilities/arch/gen_trans_error_codes.py diff --git a/scripts/utilities/gen_trans_status.py b/scripts/utilities/gen_trans_status.py index f029939..77eeecd 100644 --- a/scripts/utilities/gen_trans_status.py +++ b/scripts/utilities/gen_trans_status.py @@ -3,7 +3,7 @@ import cons -def gen_trans_status(series, rejection_rates_dict): +def gen_trans_status(series, rejection_rates_dict, rejection_codes): """Generates the transaction status for a pandas series from the transaction level telecom payments data given the rejection rates dictionary from the same data Parameters @@ -12,6 +12,8 @@ def gen_trans_status(series, rejection_rates_dict): A pandas series from the transaction level telecom payments data rejection_rates_dict : dict Rejection rates generated the transaction level telecom payments data + rejection_codes : dict + A dictionary of rejection codes and assoicated proportions Returns ------- @@ -19,56 +21,46 @@ def gen_trans_status(series, rejection_rates_dict): The transaction status for the pandas series """ # set country code columns - country_code_columns = [ - "registration_country_code", - "ip_country_code", - "card_country_code", - ] + country_code_columns = ["registration_country_code","ip_country_code","card_country_code"] + if series['card_hash'] == series['card_hash']: # add rejections based on crime rates within country codes - if rejection_rates_dict["country_code_trans_reject_rate_dict"][ - np.random.choice(a=series[country_code_columns].dropna().to_list(), size=1)[0] - ] >= random.uniform(0, 1): + if rejection_rates_dict["country_code_trans_reject_rate_dict"][np.random.choice(a=series[country_code_columns].dropna().to_list(), size=1)[0]] >= random.uniform(0, 1): status = "rejected" + error_code = np.random.choice(a=list(rejection_codes.keys()),p=list(rejection_codes.values()),size=1)[0] # add rejections based on domain frequencies - elif rejection_rates_dict["domain_email_trans_reject_rate_dict"][ - series["email_domain"] - ] >= random.uniform(0, 1): + elif rejection_rates_dict["domain_email_trans_reject_rate_dict"][series["email_domain"]] >= random.uniform(0, 1): status = "rejected" + error_code = np.random.choice(a=list(rejection_codes.keys()),p=list(rejection_codes.values()),size=1)[0] # add rejections based on inconsistent country codes - elif cons.data_model_inconsistent_country_codes_rejection_rate[ - series[country_code_columns].dropna().nunique() - ] >= random.uniform(0, 1): + elif cons.data_model_inconsistent_country_codes_rejection_rate[series[country_code_columns].dropna().nunique()] >= random.uniform(0, 1): status = "rejected" + error_code = np.random.choice(a=list(rejection_codes.keys()),p=list(rejection_codes.values()),size=1)[0] # add rejections based on shared ips, cards and devices - elif series["device_hash"] == series["device_hash"] and rejection_rates_dict[ - "shared_devices_reject_rate_dict" - ][series["device_hash"]] >= random.uniform(0, 1): + elif series["device_hash"] == series["device_hash"] and rejection_rates_dict["shared_devices_reject_rate_dict"][series["device_hash"]] >= random.uniform(0, 1): status = "rejected" - elif series["ip_hash"] == series["ip_hash"] and rejection_rates_dict[ - "shared_ips_reject_rate_dict" - ][series["ip_hash"]] >= random.uniform(0, 1): + error_code = np.random.choice(a=list(rejection_codes.keys()),p=list(rejection_codes.values()),size=1)[0] + elif series["ip_hash"] == series["ip_hash"] and rejection_rates_dict["shared_ips_reject_rate_dict"][series["ip_hash"]] >= random.uniform(0, 1): status = "rejected" - elif series["card_hash"] == series["card_hash"] and rejection_rates_dict[ - "shared_cards_reject_rate_dict" - ][series["card_hash"]] >= random.uniform(0, 1): + error_code = np.random.choice(a=list(rejection_codes.keys()),p=list(rejection_codes.values()),size=1)[0] + elif series["card_hash"] == series["card_hash"] and rejection_rates_dict["shared_cards_reject_rate_dict"][series["card_hash"]] >= random.uniform(0, 1): status = "rejected" + error_code = np.random.choice(a=list(rejection_codes.keys()),p=list(rejection_codes.values()),size=1)[0] # add rejections based on counts of devices, ips and cards - elif rejection_rates_dict["count_devices_reject_rate_dict"][ - series["userid"] - ] >= random.uniform(0, 1): + elif rejection_rates_dict["count_devices_reject_rate_dict"][series["userid"]] >= random.uniform(0, 1): status = "rejected" - elif rejection_rates_dict["count_ips_reject_rate_dict"][ - series["userid"] - ] >= random.uniform(0, 1): + error_code = np.random.choice(a=list(rejection_codes.keys()),p=list(rejection_codes.values()),size=1)[0] + elif rejection_rates_dict["count_ips_reject_rate_dict"][series["userid"]] >= random.uniform(0, 1): status = "rejected" - elif rejection_rates_dict["count_cards_reject_rate_dict"][ - series["userid"] - ] >= random.uniform(0, 1): + error_code = np.random.choice(a=list(rejection_codes.keys()),p=list(rejection_codes.values()),size=1)[0] + elif rejection_rates_dict["count_cards_reject_rate_dict"][series["userid"]] >= random.uniform(0, 1): status = "rejected" + error_code = np.random.choice(a=list(rejection_codes.keys()),p=list(rejection_codes.values()),size=1)[0] # otherwise return successful status else: - status = series["transaction_status"] + status = np.random.choice(a=['successful', 'pending'], size=1, p=[0.98, 0.02])[0] + error_code = np.nan else: status = np.random.choice(a=['successful', 'pending'], size=1, p=[0.98, 0.02])[0] - return status + error_code = np.nan + return [status, error_code]