Skip to content

Commit

Permalink
Merge pull request #18 from oislen/dev
Browse files Browse the repository at this point in the history
14 scale timescales
  • Loading branch information
oislen authored Oct 6, 2024
2 parents 81f27b9 + 60c0ef1 commit 630b328
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 8 deletions.
9 changes: 6 additions & 3 deletions scripts/app/ProgrammeParams.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import numpy as np
import cons
from datetime import datetime

class ProgrammeParams():

def __init__(self, n_users = 100, random_seed = None, n_applications = 20000, registration_start_date = '2020-01-01', registration_end_date = '2020-12-31', transaction_start_date = '2021-01-01', transaction_end_date = '2021-12-31'):
def __init__(self, n_users=100, random_seed=None, n_applications=20000, registration_start_date='2020-01-01', registration_end_date='2020-12-31', transaction_start_date='2021-01-01', transaction_end_date='2021-12-31'):
# take programme parameters from class parameters
self.random_seed = random_seed
self.n_users = n_users
self.n_applications = n_applications
self.registration_start_date = registration_start_date
self.registration_end_date = registration_end_date
self.transaction_start_date = transaction_start_date
self.transaction_end_date = transaction_end_date
self.transaction_end_date = transaction_end_date
transaction_start_date_strftime = datetime.strptime(self.transaction_start_date, cons.date_date_strftime)
transaction_end_date_strftime = datetime.strptime(self.transaction_end_date, cons.date_date_strftime)
self.transaction_timescale = ((transaction_end_date_strftime - transaction_start_date_strftime).days + 1) / 365
13 changes: 10 additions & 3 deletions scripts/app/gen_random_telecom_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from objects.User import User
from utilities.gen_random_entity_counts import gen_random_entity_counts

def gen_random_telecom_data(n_users=1, random_seed=None):
def gen_random_telecom_data(n_users=1, random_seed=None, registration_start_date='2020-01-01', registration_end_date='2020-12-31', transaction_start_date='2021-01-01', transaction_end_date='2021-12-31'):
"""Generates random telecommunications data
Parameters
Expand All @@ -28,7 +28,14 @@ def gen_random_telecom_data(n_users=1, random_seed=None):
"""

# initalise programme parameters
programmeparams = ProgrammeParams(n_users=n_users, random_seed=random_seed)
programmeparams = ProgrammeParams(
n_users=n_users,
random_seed=random_seed,
registration_start_date=registration_start_date,
registration_end_date=registration_end_date,
transaction_start_date=transaction_start_date,
transaction_end_date=transaction_end_date
)

# set random seed
random.seed(programmeparams.random_seed)
Expand All @@ -38,7 +45,7 @@ def gen_random_telecom_data(n_users=1, random_seed=None):
user_obj = User(n_user_ids=programmeparams.n_users, start_date=programmeparams.registration_start_date, end_date=programmeparams.registration_end_date)

# generate random entity counts for each user
random_entity_counts = gen_random_entity_counts(user_obj)
random_entity_counts = gen_random_entity_counts(user_obj, transaction_timescale=programmeparams.transaction_timescale)

# generate random entity values
device_obj = Device(n_device_hashes=random_entity_counts['n_devices'].sum())
Expand Down
1 change: 1 addition & 0 deletions scripts/cons.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
unittest_transaction_end_date = '2021-12-31'

# set data model constants
date_date_strftime = "%Y-%m-%d"
data_model_entity_user_ratios = {'card':1.3, 'device':2.5, 'transaction':5.3, 'ip':4.3}
data_model_poisson_params = {'user':{'lambda':20, 'power':1}, 'device':{'lambda':0.2, 'power':2}, 'card':{'lambda':0.1, 'power':2}, 'ip':{'lambda':1.3, 'power':2}, 'application':{'lambda':1, 'power':2}, 'transaction':{'lambda':5, 'power':2}}
data_model_shared_entities_dict = {'ip':0.05, 'card':0.005, 'device':0.01}
Expand Down
2 changes: 1 addition & 1 deletion scripts/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,6 @@

# write data to disk
logging.info(f'Writing intermediate user level random telecoms data to: {cons.fpath_randomtelecomusersdata}')
logging.info(f'Writing output trans level random telecoms data to: {cons.fpath_randomtelecomusersdata}')
logging.info(f'Writing output trans level random telecoms data to: {cons.fpath_randomtelecomtransdata}')
user_data.to_parquet(cons.fpath_randomtelecomusersdata, engine='fastparquet')
trans_data.to_csv(cons.fpath_randomtelecomtransdata, index = False)
8 changes: 8 additions & 0 deletions scripts/utilities/commandline_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ def commandline_interface():
parser.add_argument("--n_users", action="store", dest="n_users", type=int, default=100, help="Integer, the number of users to generate random telecom payments data for",)
parser.add_argument("--use_random_seed", action="store", dest="use_random_seed", type=int, default=0, help="Integer, use a set random seed for reproducible results; must be either 0 or 1",)
parser.add_argument("--n_itr", action="store", dest="n_itr", type=int, default=1, help="Integer, number of iterations to run",)
parser.add_argument("--registration_start_date", action="store", dest="registration_start_date", type=str, default="2020-01-01", help="String, the start date for registrations",)
parser.add_argument("--registration_end_date", action="store", dest="registration_end_date", type=str, default="2020-12-31", help="String, the end date for registrations",)
parser.add_argument("--transaction_start_date", action="store", dest="transaction_start_date", type=str, default="2021-01-01", help="String, the start date for transactions",)
parser.add_argument("--transaction_end_date", action="store", dest="transaction_end_date", type=str, default="2021-12-31", help="String, the end date for transactions",)
# create an output dictionary to hold the results
input_params_dict = {}
# extract input arguments
Expand All @@ -32,4 +36,8 @@ def commandline_interface():
input_params_dict["n_users"] = args.n_users
input_params_dict["use_random_seed"] = args.use_random_seed
input_params_dict["n_itr"] = args.n_itr
input_params_dict["registration_start_date"] = args.registration_start_date
input_params_dict["registration_end_date"] = args.registration_end_date
input_params_dict["transaction_start_date"] = args.transaction_start_date
input_params_dict["transaction_end_date"] = args.transaction_end_date
return input_params_dict
6 changes: 5 additions & 1 deletion scripts/utilities/gen_random_entity_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import cons
from utilities.gen_random_poisson_power import gen_random_poisson_power

def gen_random_entity_counts(user_obj):
def gen_random_entity_counts(user_obj, transaction_timescale=1.0):
"""Generates a dataframe of entity counts for all users from a given user object
Parameters
----------
user_obj : User Class
The User class object
transaction_timescale : float
The transaction timescale where 1.0 is a single year of transactions, default is 1.0
Returns
-------
Expand All @@ -26,4 +28,6 @@ def gen_random_entity_counts(user_obj):
random_entity_counts['n_ips'] = gen_random_poisson_power(lam = cons.data_model_poisson_params["ip"]["lambda"], size = user_obj.n_user_ids, power = cons.data_model_poisson_params["ip"]["power"])
random_entity_counts['n_transactions'] = gen_random_poisson_power(lam = cons.data_model_poisson_params["transaction"]["lambda"], size = user_obj.n_user_ids, power = cons.data_model_poisson_params["transaction"]["power"])
random_entity_counts['n_applications'] = gen_random_poisson_power(lam = cons.data_model_poisson_params["application"]["lambda"], size = user_obj.n_user_ids, power = cons.data_model_poisson_params["application"]["power"])
# scale n transactions by
random_entity_counts['n_transactions'] = (random_entity_counts['n_transactions'] * transaction_timescale).round().astype(int)
return random_entity_counts

0 comments on commit 630b328

Please sign in to comment.