-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscore.py
87 lines (78 loc) · 3.42 KB
/
score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Databricks notebook source
import json
from pyspark.ml.pipeline import PipelineModel
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType, DoubleType
# COMMAND ----------
# MAGIC %run "/Workspace/Users/n01606417@humber.ca/etl_and_ml_project/utils/Common Variables"
# COMMAND ----------
model = PipelineModel.load(f"{model_folder_path}/ml_pipeline")
model
# COMMAND ----------
def make_prediction(model, raw_data):
data = json.loads(raw_data)
schema = StructType([
StructField('is_best_seller', IntegerType(), True),
StructField('is_prime', IntegerType(), True),
StructField('offers_count', IntegerType(), True),
StructField('num_ratings', DoubleType(), True),
StructField('original_price', DoubleType(), True),
StructField('price', DoubleType(), True),
StructField('star_rating', DoubleType(), True),
StructField('sales_volume', IntegerType(), True),
StructField('discount', DoubleType(), True),
StructField('profit', DoubleType(), True),
StructField('discount_percentage', DoubleType(), True),
StructField('profit_percentage', DoubleType(), True),
StructField('product_title_Hanes', IntegerType(), True),
StructField('product_title_adidas', IntegerType(), True),
StructField('product_title_Amazon_Essentials', IntegerType(), True),
StructField('product_title_Dokotoo', IntegerType(), True),
StructField('product_title_Carhartt', IntegerType(), True),
StructField('product_title_SOJOS', IntegerType(), True),
StructField('product_title_AUTOMET', IntegerType(), True),
StructField('product_title_Fruit_of_the_Loom', IntegerType(), True),
StructField('product_title_Skechers', IntegerType(), True),
StructField('product_title_Trendy_Queen', IntegerType(), True),
StructField('product_title_Wrangler_Authentics', IntegerType(), True),
StructField("product_title_Levi's", IntegerType(), True),
StructField('product_title_PRETTYGARDEN', IntegerType(), True),
StructField('product_title_Gildan', IntegerType(), True),
StructField('product_title_Lee', IntegerType(), True),
StructField('product_title_others', IntegerType(), True)
])
input_df = spark.createDataFrame(data, schema=schema)
predictions = model.transform(input_df)
return predictions.select("prediction").toPandas().to_json(orient="records")
# COMMAND ----------
# raw_data = {
# "is_best_seller": 1,
# "is_prime": 1,
# "offers_count": 1,
# "num_ratings": 4.5,
# "original_price": 100.0,
# "price": 50.0,
# "star_rating": 4.5,
# "sales_volume": 1000,
# "discount": 50.0,
# "profit": 30.0,
# "discount_percentage": 50.0,
# "profit_percentage": 50.0,
# "product_title_Hanes": 1,
# "product_title_adidas": 0,
# "product_title_Amazon_Essentials": 0,
# "product_title_Dokotoo": 0,
# "product_title_Carhartt": 0,
# "product_title_SOJOS": 0,
# "product_title_AUTOMET": 0,
# "product_title_Fruit_of_the_Loom": 0,
# "product_title_Skechers": 0,
# "product_title_Trendy_Queen": 0,
# "product_title_Wrangler_Authentics": 0,
# "product_title_Levi's": 0,
# "product_title_PRETTYGARDEN": 0,
# "product_title_Gildan": 0,
# "product_title_Lee": 0,
# "product_title_others": 1
# }
# raw_data = json.dumps([raw_data])
# make_prediction(model,raw_data)