diff --git a/Qlik-Py-Init.bat b/Qlik-Py-Init.bat index 347226a..c600752 100644 --- a/Qlik-Py-Init.bat +++ b/Qlik-Py-Init.bat @@ -16,9 +16,9 @@ python -m pip install --upgrade setuptools pip pip install grpcio grpcio-tools numpy scipy pandas cython pip install pystan==2.17 pip install fbprophet -pip install -U scikit-learn +pip install scikit-learn pip install hdbscan -pip install -U skater +pip install skater==1.1.2 echo. echo Creating a new firewall rule for TCP port 50055... & echo. netsh advfirewall firewall add rule name="Qlik PyTools" dir=in action=allow protocol=TCP localport=50055 diff --git a/Qlik-Py-Init.ps1 b/Qlik-Py-Init.ps1 index 04680be..7665267 100644 --- a/Qlik-Py-Init.ps1 +++ b/Qlik-Py-Init.ps1 @@ -10,9 +10,9 @@ python -m pip install --upgrade setuptools pip pip install grpcio grpcio-tools numpy scipy pandas cython pip install pystan==2.17 pip install fbprophet -pip install -U scikit-learn +pip install scikit-learn pip install hdbscan -pip install -U skater +pip install skater==1.1.2 Write-Output "Creating a new firewall rule for TCP port 50055..." netsh advfirewall firewall add rule name=Qlik-PyTools dir=in action=allow protocol=TCP localport=50055 Write-Output "All done. Run Qlik-Py-Start.bat to start the SSE Extension Service." \ No newline at end of file diff --git a/core/__main__.py b/core/__main__.py index 8caf624..cf89a83 100644 --- a/core/__main__.py +++ b/core/__main__.py @@ -290,7 +290,7 @@ def _prophet(request, context): : :For more information on these parameters go here: https://facebook.github.io/prophet/docs/quick_start.html : - :Additional parameters used are: return, take_log, debug + :Additional parameters used are: return, take_log, debug, load_script : :cap = 1000 : A logistic growth model can be defined using cap and floor. Values should be double or integer :changepoint_prior_scale = 0.05 : Decrease if the trend changes are being overfit, increase for underfit @@ -318,16 +318,23 @@ def _prophet(request, context): # Create an instance of the ProphetForQlik class # This will take the request data from Qlik and prepare it for forecasting - predictor = ProphetForQlik(request_list) + predictor = ProphetForQlik(request_list, context) # Calculate the forecast and store in a Pandas series forecast = predictor.predict() + # Check if the response is a DataFrame. + # This occurs when the load_script=true argument is passed in the Qlik expression. + response_is_df = isinstance(forecast, pd.DataFrame) + # Convert the response to a list of rows forecast = forecast.values.tolist() - + # We convert values to type SSE.Dual, and group columns into a iterable - response_rows = [iter([SSE.Dual(numData=row)]) for row in forecast] + if response_is_df: + response_rows = [iter([SSE.Dual(strData=row[0]),SSE.Dual(numData=row[1])]) for row in forecast] + else: + response_rows = [iter([SSE.Dual(numData=row)]) for row in forecast] # Values are then structured as SSE.Rows response_rows = [SSE.Row(duals=duals) for duals in response_rows] @@ -393,7 +400,7 @@ def _prophet_seasonality(request, context): # Create an instance of the ProphetForQlik class # This will take the request data from Qlik and prepare it for forecasting - predictor = ProphetForQlik.init_seasonality(request_list) + predictor = ProphetForQlik.init_seasonality(request_list, context) # Calculate the forecast and store in a Pandas series forecast = predictor.predict() diff --git a/core/_prophet.py b/core/_prophet.py index 264aba3..4e653af 100644 --- a/core/_prophet.py +++ b/core/_prophet.py @@ -27,15 +27,16 @@ class ProphetForQlik: # This variable denotes the unit of time used in Qlik for numerical representation of datetime values qlik_cal_unit = 'D' - def __init__(self, request): + def __init__(self, request, context): """ Class initializer. :param request: an iterable sequence of RowData :Sets up the input data frame and parameters based on the request """ - # Set the request variable for this object instance + # Set the request and context variables for this object instance self.request = request + self.context = context # Create a Pandas Data Frame with column ds for the dates and column y for values self.request_df = pd.DataFrame([(row.duals[0].numData, row.duals[1].numData) \ @@ -132,7 +133,7 @@ def __init__(self, request): self._print_log(2) @classmethod - def init_seasonality(cls, request): + def init_seasonality(cls, request, context): """ Alternative initialization method for this class Used when the request contains the timeseries as a contatenated string, repeated for every row @@ -195,7 +196,7 @@ def init_seasonality(cls, request): updated_request = [SSE.BundledRows(rows=request_list)] # Call the default initialization method - instance = ProphetForQlik(updated_request) + instance = ProphetForQlik(updated_request, context) # Handle null value row in the request dataset instance.NaT_df = request_df.loc[request_df.ds.isnull()].copy() @@ -263,7 +264,19 @@ def predict(self): if self.debug: self._print_log(4) - return self.forecast.loc[:,self.result_type] + # If the function was called through the load script we return a Data Frame + if self.load_script: + # Create an additional series to be added to the response with input ds values as strings + ds = self.request_df['ds'].dt.strftime('%Y-%m-%d %r') + # Add the ds column to the output + self.response = pd.concat([ds, self.forecast.loc[:,self.result_type]], axis=1) + + # Send meta data on the response to Qlik + self._send_table_description() + + return self.response + else: + return self.forecast.loc[:,self.result_type] def _set_params(self): """ @@ -283,6 +296,7 @@ def _set_params(self): self.request_row_count = len(self.request_df) + len(self.NaT_df) # Set default values which will be used if an argument is not passed + self.load_script = False self.result_type = 'yhat' self.take_log = False self.seasonality = 'yearly' @@ -337,6 +351,11 @@ def _set_params(self): # Make sure the key words are in lower case self.kwargs = {k.lower(): v for k, v in self.kwargs.items()} + # Set the load_script parameter to determine the output format + # Set to 'true' if calling the functions from the load script in the Qlik app + if 'load_script' in self.kwargs: + self.load_script = 'true' == self.kwargs['load_script'].lower() + # Set the return type # Valid values are: yhat, trend, seasonal, seasonalities. # Add _lower or _upper to the series name to get lower or upper limits. @@ -564,6 +583,28 @@ def _forecast(self): self.NaT_df = self.NaT_df.rename({'y': self.result_type}, axis='columns') self.forecast = self.forecast.append(self.NaT_df) + def _send_table_description(self): + """ + Send the table description to Qlik as meta data. + Only used when the SSE is called from the Qlik load script. + """ + + # Set up the table description to send as metadata to Qlik + self.table = SSE.TableDescription() + self.table.name = "ProphetForecast" + self.table.numberOfRows = len(self.response) + + # Set up fields for the table + self.table.fields.add(name="ds") + self.table.fields.add(name=self.result_type, dataType=1) + + if self.debug: + self._print_log(5) + + # Send table description + table_header = (('qlik-tabledescription-bin', self.table.SerializeToString()),) + self.context.send_initial_metadata(table_header) + def _print_log(self, step): """ Output useful information to stdout and the log file if debugging is required. @@ -638,6 +679,14 @@ def _print_log(self, step): [f.write("{}\n".format(col)) for col in self.forecast] f.write("\nSAMPLE RESULTS:\n{0} \n\n".format(self.forecast.tail(self.periods).to_string())) f.write("FORECAST RETURNED:\n{0}\n\n".format(self.forecast.loc[:,self.result_type].to_string())) + + elif step == 5: + # Print the table description if the call was made from the load script + sys.stdout.write("\nTABLE DESCRIPTION SENT TO QLIK:\n\n{0} \n\n".format(self.table)) + + # Write the table description to the log file + with open(self.logfile,'a') as f: + f.write("\nTABLE DESCRIPTION SENT TO QLIK:\n\n{0} \n\n".format(self.table)) @staticmethod def timeit(request): diff --git a/docker/Dockerfile v.4.0 b/docker/Dockerfile v.4.0 new file mode 100644 index 0000000..4b9f14a --- /dev/null +++ b/docker/Dockerfile v.4.0 @@ -0,0 +1,35 @@ +# Use an official Python runtime as a parent image +FROM python:3.6.8 + +# Set the working directory to /qlik-py-tools +WORKDIR /qlik-py-tools + +# Copy the current directory contents into the container at /qlik-py-tools +COPY . /qlik-py-tools + +# Install dependencies +RUN apt-get update +RUN apt-get install build-essential + +# Upgrade pip and setuptools +RUN python -m pip install --upgrade setuptools pip + +# Install required packages +RUN pip install grpcio grpcio-tools numpy scipy pandas cython +RUN pip install pystan==2.17 +RUN pip install fbprophet +RUN pip install scikit-learn +RUN pip install hdbscan +RUN pip install skater==1.1.2 + +# Copy modified file for skater +COPY ./feature_importance.py /usr/local/lib/python3.6/site-packages/skater-1.1.2-py3.6.egg/skater/core/global_interpretation/ + +# Make port 80 available to the world outside this container +EXPOSE 80 + +# Set the working directory to /qlik-py-tools/core +WORKDIR /qlik-py-tools/core + +# Run __main__.py when the container launches +CMD ["python", "__main__.py"] \ No newline at end of file diff --git a/docs/Prophet.md b/docs/Prophet.md index d293082..c0190dd 100644 --- a/docs/Prophet.md +++ b/docs/Prophet.md @@ -9,6 +9,7 @@ - [Seasonality](#seasonality) - [Holidays](#holidays) - [Use Prophet with your own app](#use-prophet-with-your-own-app) +- [Precalculating forecasts in the load script](#precalculating-forecasts-in-the-load-script) - [Attribution](#attribution) ## Introduction @@ -52,6 +53,7 @@ Any of these arguments can be included in the final string parameter for the Pro | return | The output of the expression | `yhat`, `yhat_upper`, `yhat_lower`, `y_then_yhat`, `y_then_yhat_upper`, `y_then_yhat_lower`, `trend`, `trend_upper`, `trend_lower`, `seasonal`, `seasonal_upper`, `seasonal_lower`, `yearly`, `yearly_upper`, `yearly_lower` & any other column in the forecast output | `yhat` refers to the forecast values. This is the default value. The `y_then_yhat` options allow you to plot the actual values for historical data and forecast values only for future dates. Upper and lower limits are available for each type of output. | | freq | The frequency of the time series | `D`, `MS`, `M`, `H`, `T`, `S`, `ms`, `us` | The most common options would be D for Daily, MS for Month Start and M for Month End. The default value is D, however this will mess up results if you provide the values in a different frequency, so always specify the frequency. See the full set of options [here](http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases). | | debug | Flag to output additional information to the terminal and logs | `true`, `false` | Information will be printed to the terminal as well to a log file: `..\qlik-py-env\core\logs\Prophet Log .txt`. Particularly useful is looking at the Request Data Frame to see what you are sending to the algorithm and the Forecast Data Frame to see the possible result columns. | +| load_script | Flag for calling the function from the Qlik load script. | `true`, `false` | Set to `true` if calling the Prophet function from the load script in the Qlik app. This will change the output to a table consisting of two fields; `ds` which is the datetime dimension passed to Prophet, and the specified return value (`yhat` by default). `ds` is returned as a string in the format `YYYY-MM-DD hh:mm:ss TT`.

This parameter only applies to the `Prophet` function. | | take_log | Take a logarithm of the values before forecasting | `true`, `false` | Default value is `false`. This can be applied when making the time series more stationary might improve forecast values. You can just try both options and compare the results. In either case the values are returned in the original scale. | | cap | A saturating maximum for the forecast | A decimal or integer value e.g. `1000000` | You can apply a logistic growth trend model using this argument. For example when the maximum market size is known. More information [here](https://facebook.github.io/prophet/docs/saturating_forecasts.html). | | floor | A saturating minimum for the forecast | A decimal or integer value e.g. `0` | This argument must be used in combination with a cap. | @@ -401,5 +403,63 @@ PyTools.Prophet_Holidays(if(FORECAST_MONTH <= AddMonths(Max(Total [Accident Mont 'freq=D, return=holidays, lower_window=-$(vHolidayWindow), upper_window=$(vHolidayWindow)') ``` +## Precalculating forecasts in the load script + +The approach explained above provides the forecast in the context of the user's selections in Qlik. This is a powerful user experience and does not require the app author to think of the granularity at which the forecast needs to be generated. + +However, in some cases, you may want to precalculate the forecast for a given set of dimensions, e.g. by product and region. This can be done through the Qlik load script. + +For an example, refer to the [simple sample app](Sample_App_Forecasting_Simple.qvf) + +``` +// Generate forecasts for each value in the Hospital field +FOR EACH vHospital in FieldValueList('Hospital') + + // Load the actual data and arguments to be passed to the Prophet function + // Future periods must be included in the date dimension with NULL values for the measure + temp: + LOAD + [Month Start] as ds, + Attendances as y, + 'freq=M, take_log=true, load_script=true' as args + RESIDENT Sheet1 + WHERE Hospital = '$(vHospital)'; + + // Call the Prophet function and store the results in the Response table + Response: + LOAD + ds, // Datetime is returned as string with format 'YYYY-MM-DD hh:mm:ss TT' + yhat, + '$(vHospital)' as Hospital + Extension PyTools.Prophet(temp{ds, y, args}); + + Drop table temp; + +Next vHospital + +// Add a composite key to the original table. This will be used to link to the forecasts. +Left Join (Sheet1) +Load + Hospital, + [Month Start], + Period, + AutoNumber(Hospital & '|' & Period) as ForecastKey +Resident Sheet1; + +// Create a Forecasts table with the same composite key. +Forecasts: +Load + AutoNumber(Hospital & '|' & Period) as ForecastKey, + [Forecast by Hospital]; +Load + Hospital, + Floor(Date#(ds, 'YYYY-MM-DD hh:mm:ss TT')) as Period, + yhat as [Forecast by Hospital] +Resident Response; + +// Drop the Response table from the final model +Drop table Response +``` + ## Attribution The data used in the sample apps was obtained from the [Crash Stats Data Extract](https://www.data.vic.gov.au/data/dataset/crash-stats-data-extract) and the [Victorian Health Services Performance](http://performance.health.vic.gov.au/Home/Report.aspx?ReportKey=157) report published by the Victorian State Government. diff --git a/docs/Sample_App_Forecasting_Simple.qvf b/docs/Sample_App_Forecasting_Simple.qvf index bd68816..19a0d18 100644 Binary files a/docs/Sample_App_Forecasting_Simple.qvf and b/docs/Sample_App_Forecasting_Simple.qvf differ