-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_prod_model.py
140 lines (116 loc) · 5.89 KB
/
run_prod_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# run prophet in production mode
import numpy as np
import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
import itertools
import warnings
import logging
import fetch_data
warnings.filterwarnings('ignore')
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)
def run_model(hospital: str, horizon_value_arg: int, initial_argument_value_arg: int, cap_type: str):
# fetch the final dataset
df = fetch_data.pull_dataset(hospital)
print('df.shape :\n', df.shape)
print("\ndf data types :\n", df.dtypes)
print("\ndf head :\n", df.head())
print("\ndf head :\n", df.tail())
# sort data df by datetime column ds
df = df.sort_values(by='ds')
print("\n\033[92mhead of sorted data by datetime ds :\n", df.head(), "\033[00m")
print("\n\033[91mtail of sorted data by datetime ds :\n", df.tail(), "\033[00m")
# reset index using column 'ds' and rename the index as 'idx'
df = df.reset_index(drop=True)
print("\n\033[94mhead of reindexed data :\n", df.head(), "\033[00m")
print("\n\033[94mtail of reindexed data :\n", df.tail(), "\033[00m")
df['weekday'] = df['ds'].dt.dayofweek # add additional regressor 'weekday'
# df['month'] = df['ds'].dt.month # add additional regressor 'month'
# df['week'] = df['ds'].dt.isocalendar().week # add additional regressor 'week'
# define function that yields max value of the following datasets: df, future (see further down for future)
if cap_type == 'hard':
cap_factor = 0
elif cap_type == 'soft':
cap_factor = 0.08
else:
raise Exception("wrong 'cap_type' value")
cap = max(df['y']) + round(cap_factor* max(df['y']))
# define max & min values for target variable df['y']
df['cap'] = cap
df['floor'] = 0
# create the parameter grid for hyperparameter tuning
param_grid = {
'growth': ['logistic'],
'seasonality_mode': ['multiplicative'],
'holidays_mode': ['multiplicative'],
'changepoint_prior_scale': [0.01, 0.025, 0.04, 0.05, 0.1, 0.5],
'seasonality_prior_scale': [1.5, 2.2, 4.0, 6.0, 7.0, 8.0, 9.0, 10.0],
'daily_seasonality': [True],
}
# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = [] # Store the RMSEs for each params here
mapes = [] # Store the MAPEs for each params here
# define cross-validation arguments
horizon_value = horizon_value_arg
horizon_argument = str(horizon_value) + ' days'
# initial_argument_value = int(0.921*len(df))
initial_argument_value = initial_argument_value_arg
initial_argument = str(initial_argument_value) + ' days'
period_argument = horizon_argument
# implement cross-validation to evaluate all parameters
for params in all_params:
m = Prophet(**params) # instantiate Prophet using params
m.add_country_holidays(country_name='GR') # add Greek holidays
m.add_regressor('weekday') # add to Prophet the additional regressor 'weekday'
# m.add_regressor('month') # add to Prophet the additional regressor 'month'
# m.add_regressor('week') # add to Prophet the additional regressor 'week'
m.fit(df) # fit model on train-validate set
df_cv = cross_validation(m, initial=initial_argument, period=period_argument, horizon=horizon_argument,
parallel="threads")
df_p = performance_metrics(df_cv, rolling_window=1)
rmses.append(df_p['rmse'].values[0])
mapes.append(round(100 * df_p['mape'].values[0], 2))
# Find the best parameters
pd.set_option('display.max_columns', None)
tuning_results = pd.DataFrame(all_params)
# tuning_results['rmse'] = rmses
tuning_results['mape'] = mapes
print('tuning results :\n', tuning_results)
# print best params
# best_params = all_params[np.argmin(rmses)]
best_params = all_params[np.argmin(mapes)]
print('best params :', best_params)
print('best params dtype:', type(best_params))
print('df_cv for best params :\n', df_cv.head())
# instantiate prophet model based on best parameter set best_params
m_best = Prophet(**best_params).add_regressor('weekday').add_country_holidays(country_name='GR').fit(df)
# create future
future = m_best.make_future_dataframe(periods=horizon_value, freq='D')
# define max & min future values for future
future['cap'] = cap
future['floor'] = 0
print('\nfuture :\n', future.tail(horizon_value))
# add to future additional regressors 'weekday', 'month', 'week'
future['weekday'] = future['ds'].dt.dayofweek
# future['month'] = future['ds'].dt.month
# future['week'] = future['ds'].dt.isocalendar().week
# make the forecast and process its columns
forecast = m_best.predict(future)
forecast['y_hat'] = forecast['yhat']
forecast['ds'] = forecast['ds'].dt.strftime('%Y-%m-%d') # convert column forecast['ds'] to yyyy-mm-dd format
forecast['y_hat'] = round(forecast['y_hat']).astype('int64')
forecast['yhat_lower'] = forecast['yhat_lower'].apply(np.floor).astype('int64')
forecast['yhat_upper'] = forecast['yhat_upper'].apply(np.ceil).astype('int64')
forecast['hospital'] = hospital
forecast['horizon'] = str(horizon_value)
forecast = forecast[['hospital','horizon','ds', 'y_hat', 'yhat_lower', 'yhat_upper']].tail(horizon_value)
forecast.index = np.arange(1, len(forecast) + 1) # reindex forecast starting from 1
print('\nforecast for horizon =', horizon_argument, ' :\n', forecast)
mape = '-'
df['hospital'] = hospital # add column hospital to train-validation set
print('holidays :', m_best.train_holiday_names)
return forecast, mape, horizon_argument, initial_argument, period_argument, best_params, cap, df