Skip to content

Commit

Permalink
Updates for issues #34 and #29
Browse files Browse the repository at this point in the history
Fix for machine learning where no scaling is required.
Additional parameters for Prophet.
  • Loading branch information
Nabeel committed Jul 10, 2019
1 parent 8b78d04 commit 4ecc928
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 5 deletions.
8 changes: 6 additions & 2 deletions core/_machine_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ def fit(self, X, y=None, features=None, retrain=False):

self.__init__(features)

# Set up an empty data frame for data to be scaled
scale_df = pd.DataFrame()

if self.ohe:
# Get a subset of the data that requires one hot encoding
ohe_df = X[self.ohe_meta.index.tolist()]
Expand Down Expand Up @@ -305,6 +308,7 @@ def transform(self, X, y=None):
"""

X_transform = None
scale_df = pd.DataFrame()

if self.ohe:
# Get a subset of the data that requires one hot encoding
Expand Down Expand Up @@ -561,7 +565,7 @@ def _print_log(self, step, **kwargs):
f.write("Fit tfidf_df shape:{0}\nSample Data:\n{1}\n\n".format(kwargs['tfidf_df'].shape, kwargs['tfidf_df'].head()))

try:
if len(self.scale_df) > 0:
if len(kwargs['scale_df']) > 0:
sys.stdout.write("Fit scale_df shape:{0}\nSample Data:\n{1}\n\n".format(kwargs['scale_df'].shape, kwargs['scale_df'].head()))

with open(self.log,'a', encoding='utf-8') as f:
Expand Down Expand Up @@ -595,7 +599,7 @@ def _print_log(self, step, **kwargs):
f.write("Transform tfidf_df shape:{0}\nSample Data:\n{1}\n\n".format(kwargs['tfidf_df'].shape, kwargs['tfidf_df'].head()))

try:
if len(self.scale_df) > 0:
if len(kwargs['scale_df']) > 0:
sys.stdout.write("Transform scale_df shape:{0}\nSample Data:\n{1}\n\n".format(kwargs['scale_df'].shape, kwargs['scale_df'].head()))

with open(self.log,'a', encoding='utf-8') as f:
Expand Down
51 changes: 48 additions & 3 deletions core/_prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def predict(self):
if self.name is not None and len(self.add_seasonality_kwargs) > 0:
self.model.add_seasonality(**self.add_seasonality_kwargs)

self.model.fit(self.input_df)
self.model.fit(self.input_df, **self.fit_kwargs)

# Create a data frame for future values
self.future_df = self.model.make_future_dataframe(**self.make_kwargs)
Expand Down Expand Up @@ -320,6 +320,11 @@ def _set_params(self):
self.mode = None
self.seasonality_prior_scale = None
self.holidays_prior_scale = None
self.mcmc_samples = None
self.seed = None
self.n_changepoints = None
self.changepoint_range = None
self.uncertainty_samples = None
self.is_seasonality_request = False
self.weekly_start = 6 # Defaulting to a Monday start for the week as used in Qlik
self.yearly_start = 0
Expand Down Expand Up @@ -447,6 +452,34 @@ def _set_params(self):
# Reducing this parameter dampens holiday effects. Default is 10, which provides very little regularization.
if 'holidays_prior_scale' in self.kwargs:
self.holidays_prior_scale = utils.atof(self.kwargs['holidays_prior_scale'])

# Set the number of MCMC samples.
# If greater than 0, Prophet will do full Bayesian inference with the specified number of MCMC samples.
# If 0, Prophet will do MAP estimation. Default is 0.
if 'mcmc_samples' in self.kwargs:
self.mcmc_samples = utils.atoi(self.kwargs['mcmc_samples'])

# Random seed that can be used to control stochasticity.
# Used for setting the numpy random seed used in predict and also for pystan when using mcmc_samples>0.
if 'random_seed' in self.kwargs:
self.seed = utils.atoi(self.kwargs['random_seed'])

# Set the random seed for numpy
np.random.seed(self.seed)

# Number of potential changepoints to include. Default value is 25.
# Potential changepoints are selected uniformly from the first `changepoint_range` proportion of the history.
if 'n_changepoints' in self.kwargs:
self.n_changepoints = utils.atoi(self.kwargs['n_changepoints'])

# Proportion of history in which trend changepoints will be estimated.
# Defaults to 0.8 for the first 80%.
if 'changepoint_range' in self.kwargs:
self.changepoint_range = utils.atof(self.kwargs['changepoint_range'])

# Number of simulated draws used to estimate uncertainty intervals.
if 'uncertainty_samples' in self.kwargs:
self.uncertainty_samples = utils.atoi(self.kwargs['uncertainty_samples'])

# Set the weekly start for 'weekly' seasonality requests
# Default week start is 0 which represents Sunday. Add offset as required.
Expand All @@ -468,16 +501,18 @@ def _set_params(self):
if 'upper_window' in self.kwargs:
self.upper_window = utils.atoi(self.kwargs['upper_window'])

# Create dictionary of arguments for the Prophet(), make_future_dataframe() and add_seasonality() functions
# Create dictionary of arguments for the Prophet(), make_future_dataframe(), add_seasonality() and fit() functions
self.prophet_kwargs = {}
self.make_kwargs = {}
self.add_seasonality_kwargs = {}
self.fit_kwargs = {}

# Populate the parameters in the corresponding dictionary:

# Set up a list of possible key word arguments for the Prophet() function
prophet_params = ['seasonality_mode', 'growth', 'changepoint_prior_scale', 'interval_width',\
'seasonality_prior_scale', 'holidays_prior_scale']
'seasonality_prior_scale', 'holidays_prior_scale', 'mcmc_samples', 'n_changepoints',\
'changepoint_range', 'uncertainty_samples']

# Create dictionary of key word arguments for the Prophet() function
self.prophet_kwargs = self._populate_dict(prophet_params)
Expand All @@ -493,6 +528,14 @@ def _set_params(self):

# Create dictionary of key word arguments for the add_seasonality() function
self.add_seasonality_kwargs = self._populate_dict(seasonality_params)

# Pass the random seed to the fit method if MCMC is being used
if self.mcmc_samples is not None and self.mcmc_samples > 0:
# Set up a list of possible key word arguments for the fit() function
fit_params = ['seed']
# Create dictionary of key word arguments for the fit() function
self.fit_kwargs = self._populate_dict(fit_params)


def _populate_dict(self, params):
"""
Expand Down Expand Up @@ -636,6 +679,7 @@ def _print_log(self, step):
sys.stdout.write("Instance creation parameters: {0}\n\n".format(self.prophet_kwargs))
sys.stdout.write("Make future data frame parameters: {0}\n\n".format(self.make_kwargs))
sys.stdout.write("Add seasonality parameters: {0}\n\n".format(self.add_seasonality_kwargs))
sys.stdout.write("Fit parameters: {0}\n\n".format(self.fit_kwargs))
sys.stdout.write("REQUEST DATA FRAME: {0} rows x cols\n\n".format(self.request_df.shape))
sys.stdout.write("{0} \n\n".format(self.request_df.to_string()))
if len(self.NaT_df) > 0:
Expand All @@ -653,6 +697,7 @@ def _print_log(self, step):
f.write("Instance creation parameters: {0}\n\n".format(self.prophet_kwargs))
f.write("Make future data frame parameters: {0}\n\n".format(self.make_kwargs))
f.write("Add seasonality parameters: {0}\n\n".format(self.add_seasonality_kwargs))
f.write("Fit parameters: {0}\n\n".format(self.fit_kwargs))
f.write("REQUEST DATA FRAME: {0} rows x cols\n\n".format(self.request_df.shape))
f.write("{0} \n\n".format(self.request_df.to_string()))
if len(self.NaT_df) > 0:
Expand Down
5 changes: 5 additions & 0 deletions docs/Prophet.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,15 @@ Any of these arguments can be included in the final string parameter for the Pro
| debug | Flag to output additional information to the terminal and logs | `true`, `false` | Information will be printed to the terminal as well to a log file: `..\qlik-py-env\core\logs\Prophet Log <n>.txt`. Particularly useful is looking at the Request Data Frame to see what you are sending to the algorithm and the Forecast Data Frame to see the possible result columns. |
| load_script | Flag for calling the function from the Qlik load script. | `true`, `false` | Set to `true` if calling the Prophet function from the load script in the Qlik app. This will change the output to a table consisting of two fields; `ds` which is the datetime dimension passed to Prophet, and the specified return value (`yhat` by default). `ds` is returned as a string in the format `YYYY-MM-DD hh:mm:ss TT`.<br/><br/>This parameter only applies to the `Prophet` function. |
| take_log | Take a logarithm of the values before forecasting | `true`, `false` | Default value is `false`. This can be applied when making the time series more stationary might improve forecast values. You can just try both options and compare the results. In either case the values are returned in the original scale. |
| random_seed | An integer to control some of the stochasticity in the model | An integer value e.g. `42`, `1000` | The random seed can be used to make uncertaintly intervals for predictions deterministic and repeatable. If using `mmc_samples` > 0 this also applies to MMC sampling. However there may still be small variances in results from the model. More info [here](https://github.com/facebook/prophet/issues/849). |
| cap | A saturating maximum for the forecast | A decimal or integer value e.g. `1000000` | You can apply a logistic growth trend model using this argument. For example when the maximum market size is known. More information [here](https://facebook.github.io/prophet/docs/saturating_forecasts.html). |
| floor | A saturating minimum for the forecast | A decimal or integer value e.g. `0` | This argument must be used in combination with a cap. |
| changepoint_prior_scale | A parameter to adjust the trend flexibility | A decimal value e.g. `0.05` | If the trend changes are being overfit (too much flexibility) or underfit (not enough flexibility), you can try adjusting this parameter. The default value is `0.05`. Increasing it will make the trend more flexible. Decreasing it will make the trend less flexible. More information [here](https://facebook.github.io/prophet/docs/trend_changepoints.html). |
| n_changepoints | Number of potential changepoints to include | An integer value e.g. `50` | This number of potential changepoints are selected uniformly from the first `changepoint_range` proportion of the history. The default value is `25`. |
| changepoint_range | Proportion of history in which trend changepoints will be estimated | A decimal value less than 1 e.g. `0.9` | Defaults to `0.8` for the first 80%. |
| interval_width | The width of the uncertainty intervals | A decimal value e.g. `0.8` | The default value is `0.8` (80%). More information [here](https://facebook.github.io/prophet/docs/uncertainty_intervals.html). |
| uncertainty_samples | Number of simulated draws used to estimate uncertainty intervals | An integer value e.g. `1000` | The default value is `1000`. |
| mcmc_samples | Set the number of MCMC samples | An integer value e.g. `1000` | If greater than 0, Prophet will do full Bayesian inference with the specified number of MCMC samples. If 0, Prophet will do MAP estimation. The default value is `0`. |
| seasonality_mode | Use additive or multiplicative model for seasonality. | `additive`, `multiplicative` | By default Prophet fits additive seasonalities, meaning the effect of the seasonality is added to the trend to get the forecast. If the seasonality is not a constant additive factor as assumed by Prophet, rather it grows with the trend you can set this parameter to `multiplicative`. More information [here](https://facebook.github.io/prophet/docs/multiplicative_seasonality.html). |
| add_seasonality | Additional seasonality to be considered in the forecast. | A string value which represents the name of the seasonality e.g. `monthly` | Prophet will by default fit weekly and yearly seasonalities, if the time series is more than two cycles long. It will also fit daily seasonality for a sub-daily time series. You can add other seasonalities (monthly, quarterly, hourly) using this parameter. More information [here](https://facebook.github.io/prophet/docs/seasonality_and_holiday_effects.html). |
| add_seasonality_mode | Use additive or multiplicative model for the additional seasonality. | `additive`, `multiplicative` | See the `seasonality_mode` parameter above. If the additional seasonality requires a different mode you can use this parameter. More information [here](https://facebook.github.io/prophet/docs/multiplicative_seasonality.html). |
Expand Down

0 comments on commit 4ecc928

Please sign in to comment.