Skip to content

Commit

Permalink
Merge pull request #22 from nabeel-oz/prophet_load_script
Browse files Browse the repository at this point in the history
Precalculated forecasts
  • Loading branch information
nabeel-oz committed Apr 11, 2019
2 parents de0246e + 954936c commit d830e4b
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 14 deletions.
4 changes: 2 additions & 2 deletions Qlik-Py-Init.bat
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ python -m pip install --upgrade setuptools pip
pip install grpcio grpcio-tools numpy scipy pandas cython
pip install pystan==2.17
pip install fbprophet
pip install -U scikit-learn
pip install scikit-learn
pip install hdbscan
pip install -U skater
pip install skater==1.1.2
echo.
echo Creating a new firewall rule for TCP port 50055... & echo.
netsh advfirewall firewall add rule name="Qlik PyTools" dir=in action=allow protocol=TCP localport=50055
Expand Down
4 changes: 2 additions & 2 deletions Qlik-Py-Init.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ python -m pip install --upgrade setuptools pip
pip install grpcio grpcio-tools numpy scipy pandas cython
pip install pystan==2.17
pip install fbprophet
pip install -U scikit-learn
pip install scikit-learn
pip install hdbscan
pip install -U skater
pip install skater==1.1.2
Write-Output "Creating a new firewall rule for TCP port 50055..."
netsh advfirewall firewall add rule name=Qlik-PyTools dir=in action=allow protocol=TCP localport=50055
Write-Output "All done. Run Qlik-Py-Start.bat to start the SSE Extension Service."
17 changes: 12 additions & 5 deletions core/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def _prophet(request, context):
:
:For more information on these parameters go here: https://facebook.github.io/prophet/docs/quick_start.html
:
:Additional parameters used are: return, take_log, debug
:Additional parameters used are: return, take_log, debug, load_script
:
:cap = 1000 : A logistic growth model can be defined using cap and floor. Values should be double or integer
:changepoint_prior_scale = 0.05 : Decrease if the trend changes are being overfit, increase for underfit
Expand Down Expand Up @@ -318,16 +318,23 @@ def _prophet(request, context):

# Create an instance of the ProphetForQlik class
# This will take the request data from Qlik and prepare it for forecasting
predictor = ProphetForQlik(request_list)
predictor = ProphetForQlik(request_list, context)

# Calculate the forecast and store in a Pandas series
forecast = predictor.predict()

# Check if the response is a DataFrame.
# This occurs when the load_script=true argument is passed in the Qlik expression.
response_is_df = isinstance(forecast, pd.DataFrame)

# Convert the response to a list of rows
forecast = forecast.values.tolist()

# We convert values to type SSE.Dual, and group columns into a iterable
response_rows = [iter([SSE.Dual(numData=row)]) for row in forecast]
if response_is_df:
response_rows = [iter([SSE.Dual(strData=row[0]),SSE.Dual(numData=row[1])]) for row in forecast]
else:
response_rows = [iter([SSE.Dual(numData=row)]) for row in forecast]

# Values are then structured as SSE.Rows
response_rows = [SSE.Row(duals=duals) for duals in response_rows]
Expand Down Expand Up @@ -393,7 +400,7 @@ def _prophet_seasonality(request, context):

# Create an instance of the ProphetForQlik class
# This will take the request data from Qlik and prepare it for forecasting
predictor = ProphetForQlik.init_seasonality(request_list)
predictor = ProphetForQlik.init_seasonality(request_list, context)

# Calculate the forecast and store in a Pandas series
forecast = predictor.predict()
Expand Down
59 changes: 54 additions & 5 deletions core/_prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ class ProphetForQlik:
# This variable denotes the unit of time used in Qlik for numerical representation of datetime values
qlik_cal_unit = 'D'

def __init__(self, request):
def __init__(self, request, context):
"""
Class initializer.
:param request: an iterable sequence of RowData
:Sets up the input data frame and parameters based on the request
"""

# Set the request variable for this object instance
# Set the request and context variables for this object instance
self.request = request
self.context = context

# Create a Pandas Data Frame with column ds for the dates and column y for values
self.request_df = pd.DataFrame([(row.duals[0].numData, row.duals[1].numData) \
Expand Down Expand Up @@ -132,7 +133,7 @@ def __init__(self, request):
self._print_log(2)

@classmethod
def init_seasonality(cls, request):
def init_seasonality(cls, request, context):
"""
Alternative initialization method for this class
Used when the request contains the timeseries as a contatenated string, repeated for every row
Expand Down Expand Up @@ -195,7 +196,7 @@ def init_seasonality(cls, request):
updated_request = [SSE.BundledRows(rows=request_list)]

# Call the default initialization method
instance = ProphetForQlik(updated_request)
instance = ProphetForQlik(updated_request, context)

# Handle null value row in the request dataset
instance.NaT_df = request_df.loc[request_df.ds.isnull()].copy()
Expand Down Expand Up @@ -263,7 +264,19 @@ def predict(self):
if self.debug:
self._print_log(4)

return self.forecast.loc[:,self.result_type]
# If the function was called through the load script we return a Data Frame
if self.load_script:
# Create an additional series to be added to the response with input ds values as strings
ds = self.request_df['ds'].dt.strftime('%Y-%m-%d %r')
# Add the ds column to the output
self.response = pd.concat([ds, self.forecast.loc[:,self.result_type]], axis=1)

# Send meta data on the response to Qlik
self._send_table_description()

return self.response
else:
return self.forecast.loc[:,self.result_type]

def _set_params(self):
"""
Expand All @@ -283,6 +296,7 @@ def _set_params(self):
self.request_row_count = len(self.request_df) + len(self.NaT_df)

# Set default values which will be used if an argument is not passed
self.load_script = False
self.result_type = 'yhat'
self.take_log = False
self.seasonality = 'yearly'
Expand Down Expand Up @@ -337,6 +351,11 @@ def _set_params(self):
# Make sure the key words are in lower case
self.kwargs = {k.lower(): v for k, v in self.kwargs.items()}

# Set the load_script parameter to determine the output format
# Set to 'true' if calling the functions from the load script in the Qlik app
if 'load_script' in self.kwargs:
self.load_script = 'true' == self.kwargs['load_script'].lower()

# Set the return type
# Valid values are: yhat, trend, seasonal, seasonalities.
# Add _lower or _upper to the series name to get lower or upper limits.
Expand Down Expand Up @@ -564,6 +583,28 @@ def _forecast(self):
self.NaT_df = self.NaT_df.rename({'y': self.result_type}, axis='columns')
self.forecast = self.forecast.append(self.NaT_df)

def _send_table_description(self):
"""
Send the table description to Qlik as meta data.
Only used when the SSE is called from the Qlik load script.
"""

# Set up the table description to send as metadata to Qlik
self.table = SSE.TableDescription()
self.table.name = "ProphetForecast"
self.table.numberOfRows = len(self.response)

# Set up fields for the table
self.table.fields.add(name="ds")
self.table.fields.add(name=self.result_type, dataType=1)

if self.debug:
self._print_log(5)

# Send table description
table_header = (('qlik-tabledescription-bin', self.table.SerializeToString()),)
self.context.send_initial_metadata(table_header)

def _print_log(self, step):
"""
Output useful information to stdout and the log file if debugging is required.
Expand Down Expand Up @@ -638,6 +679,14 @@ def _print_log(self, step):
[f.write("{}\n".format(col)) for col in self.forecast]
f.write("\nSAMPLE RESULTS:\n{0} \n\n".format(self.forecast.tail(self.periods).to_string()))
f.write("FORECAST RETURNED:\n{0}\n\n".format(self.forecast.loc[:,self.result_type].to_string()))

elif step == 5:
# Print the table description if the call was made from the load script
sys.stdout.write("\nTABLE DESCRIPTION SENT TO QLIK:\n\n{0} \n\n".format(self.table))

# Write the table description to the log file
with open(self.logfile,'a') as f:
f.write("\nTABLE DESCRIPTION SENT TO QLIK:\n\n{0} \n\n".format(self.table))

@staticmethod
def timeit(request):
Expand Down
35 changes: 35 additions & 0 deletions docker/Dockerfile v.4.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Use an official Python runtime as a parent image
FROM python:3.6.8

# Set the working directory to /qlik-py-tools
WORKDIR /qlik-py-tools

# Copy the current directory contents into the container at /qlik-py-tools
COPY . /qlik-py-tools

# Install dependencies
RUN apt-get update
RUN apt-get install build-essential

# Upgrade pip and setuptools
RUN python -m pip install --upgrade setuptools pip

# Install required packages
RUN pip install grpcio grpcio-tools numpy scipy pandas cython
RUN pip install pystan==2.17
RUN pip install fbprophet
RUN pip install scikit-learn
RUN pip install hdbscan
RUN pip install skater==1.1.2

# Copy modified file for skater
COPY ./feature_importance.py /usr/local/lib/python3.6/site-packages/skater-1.1.2-py3.6.egg/skater/core/global_interpretation/

# Make port 80 available to the world outside this container
EXPOSE 80

# Set the working directory to /qlik-py-tools/core
WORKDIR /qlik-py-tools/core

# Run __main__.py when the container launches
CMD ["python", "__main__.py"]
60 changes: 60 additions & 0 deletions docs/Prophet.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- [Seasonality](#seasonality)
- [Holidays](#holidays)
- [Use Prophet with your own app](#use-prophet-with-your-own-app)
- [Precalculating forecasts in the load script](#precalculating-forecasts-in-the-load-script)
- [Attribution](#attribution)

## Introduction
Expand Down Expand Up @@ -52,6 +53,7 @@ Any of these arguments can be included in the final string parameter for the Pro
| return | The output of the expression | `yhat`, `yhat_upper`, `yhat_lower`, `y_then_yhat`, `y_then_yhat_upper`, `y_then_yhat_lower`, `trend`, `trend_upper`, `trend_lower`, `seasonal`, `seasonal_upper`, `seasonal_lower`, `yearly`, `yearly_upper`, `yearly_lower` & any other column in the forecast output | `yhat` refers to the forecast values. This is the default value. The `y_then_yhat` options allow you to plot the actual values for historical data and forecast values only for future dates. Upper and lower limits are available for each type of output. |
| freq | The frequency of the time series | `D`, `MS`, `M`, `H`, `T`, `S`, `ms`, `us` | The most common options would be D for Daily, MS for Month Start and M for Month End. The default value is D, however this will mess up results if you provide the values in a different frequency, so always specify the frequency. See the full set of options [here](http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). |
| debug | Flag to output additional information to the terminal and logs | `true`, `false` | Information will be printed to the terminal as well to a log file: `..\qlik-py-env\core\logs\Prophet Log <n>.txt`. Particularly useful is looking at the Request Data Frame to see what you are sending to the algorithm and the Forecast Data Frame to see the possible result columns. |
| load_script | Flag for calling the function from the Qlik load script. | `true`, `false` | Set to `true` if calling the Prophet function from the load script in the Qlik app. This will change the output to a table consisting of two fields; `ds` which is the datetime dimension passed to Prophet, and the specified return value (`yhat` by default). `ds` is returned as a string in the format `YYYY-MM-DD hh:mm:ss TT`.<br/><br/>This parameter only applies to the `Prophet` function. |
| take_log | Take a logarithm of the values before forecasting | `true`, `false` | Default value is `false`. This can be applied when making the time series more stationary might improve forecast values. You can just try both options and compare the results. In either case the values are returned in the original scale. |
| cap | A saturating maximum for the forecast | A decimal or integer value e.g. `1000000` | You can apply a logistic growth trend model using this argument. For example when the maximum market size is known. More information [here](https://facebook.github.io/prophet/docs/saturating_forecasts.html). |
| floor | A saturating minimum for the forecast | A decimal or integer value e.g. `0` | This argument must be used in combination with a cap. |
Expand Down Expand Up @@ -401,5 +403,63 @@ PyTools.Prophet_Holidays(if(FORECAST_MONTH <= AddMonths(Max(Total [Accident Mont
'freq=D, return=holidays, lower_window=-$(vHolidayWindow), upper_window=$(vHolidayWindow)')
```

## Precalculating forecasts in the load script

The approach explained above provides the forecast in the context of the user's selections in Qlik. This is a powerful user experience and does not require the app author to think of the granularity at which the forecast needs to be generated.

However, in some cases, you may want to precalculate the forecast for a given set of dimensions, e.g. by product and region. This can be done through the Qlik load script.

For an example, refer to the [simple sample app](Sample_App_Forecasting_Simple.qvf)

```
// Generate forecasts for each value in the Hospital field
FOR EACH vHospital in FieldValueList('Hospital')
// Load the actual data and arguments to be passed to the Prophet function
// Future periods must be included in the date dimension with NULL values for the measure
temp:
LOAD
[Month Start] as ds,
Attendances as y,
'freq=M, take_log=true, load_script=true' as args
RESIDENT Sheet1
WHERE Hospital = '$(vHospital)';
// Call the Prophet function and store the results in the Response table
Response:
LOAD
ds, // Datetime is returned as string with format 'YYYY-MM-DD hh:mm:ss TT'
yhat,
'$(vHospital)' as Hospital
Extension PyTools.Prophet(temp{ds, y, args});
Drop table temp;
Next vHospital
// Add a composite key to the original table. This will be used to link to the forecasts.
Left Join (Sheet1)
Load
Hospital,
[Month Start],
Period,
AutoNumber(Hospital & '|' & Period) as ForecastKey
Resident Sheet1;
// Create a Forecasts table with the same composite key.
Forecasts:
Load
AutoNumber(Hospital & '|' & Period) as ForecastKey,
[Forecast by Hospital];
Load
Hospital,
Floor(Date#(ds, 'YYYY-MM-DD hh:mm:ss TT')) as Period,
yhat as [Forecast by Hospital]
Resident Response;
// Drop the Response table from the final model
Drop table Response
```

## Attribution
The data used in the sample apps was obtained from the [Crash Stats Data Extract](https://www.data.vic.gov.au/data/dataset/crash-stats-data-extract) and the [Victorian Health Services Performance](http://performance.health.vic.gov.au/Home/Report.aspx?ReportKey=157) report published by the Victorian State Government.
Binary file modified docs/Sample_App_Forecasting_Simple.qvf
Binary file not shown.

0 comments on commit d830e4b

Please sign in to comment.