-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathhouse_price_prediction.py
173 lines (123 loc) · 6.48 KB
/
house_price_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#
# house_price_prediction.py
#
# This is a very simple prediction of house prices based on house size, implemented
# in Tensorflow. This code is part of Pluralsight's course "Tensorflow: Getting Started"
#
import tensorflow as tf
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib.animation as animation
# Generate some house sizes between 1000 and 3500 (typical sq ft of houses)
num_house = 160
np.random.seed(42)
house_size = np.random.randint(low=1000, high=3500, size=num_house)
# Generate house prices from the sizes with a random noise added
np.random.seed(42)
house_price = house_size * 100.0 + np.random.randint(low=20000, high=70000, size=num_house)
# Plot generated hours and size
plt.plot(house_size, house_price, "bx") # bx = blue x
plt.ylabel("Price")
plt.xlabel("Size")
plt.show()
# You need to normalize values to prevent under/overflows.
def normalize(array):
return (array - array.mean()) / array.std()
# Define number of training samples, 0.7 = 70%. We can take the first 70% since values are randomized
num_train_samples = math.floor(num_house * 0.7)
# Define the training data
train_house_size = np.asarray(house_size[:num_train_samples])
train_price = np.asarray(house_price[:num_train_samples])
train_house_size_norm = normalize(train_house_size)
train_price_norm = normalize(train_price)
# Define the test data
test_house_size = np.asarray(house_size[num_train_samples:])
test_house_price = np.asarray(house_price[num_train_samples:])
test_house_size_norm = normalize(test_house_size)
test_house_price_norm = normalize(test_house_price)
# Set up Tensorflow palceholder that get updated as we descend down the gradient
tf_house_size = tf.placeholder(tf.float32, name='house_size')
tf_price = tf.placeholder(tf.float32, name='price')
# Define the variables holding the size_factor and price we set during training.
# We initialize them to some random values based on the normal distribution.
tf_size_factor = tf.Variable(np.random.randn(), name='size_factor')
tf_price_offset = tf.Variable(np.random.randn(), name='price_offset')
# Define the operations for the predicting values.
# Notice, the use of the tensorflow add and multiply operations.
tf_price_pred = tf.add(tf.multiply(tf_size_factor, tf_house_size), tf_price_offset)
# Define the loss function (how much error) - Mean Squared Error
tf_cost = tf.reduce_sum(tf.pow(tf_price_pred - tf_price, 2)) / (2 * num_train_samples)
# Optimizer learning rate. The size of the steps down towards the gradient.
learning_rate = 0.1
# Define a Gradient Descent optimizer that will minimize the loss defined in the operation "cost".
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(tf_cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph in the session
with tf.Session() as sess:
sess.run(init)
# Set how often to display the training progress and number of training iterations
display_every = 2
num_training_iter = 50
# Calculate the number of lines to animation
fit_num_plots = math.floor(num_training_iter / display_every)
# Add storage of factor and offset values from each epoch
fit_size_factor = np.zeros(fit_num_plots)
fit_price_offsets = np.zeros(fit_num_plots)
fit_plot_idx = 0
# Keep iterating the training data
for iteration in range(num_training_iter):
# Fit all training data
for (x, y) in zip(train_house_size_norm, train_price_norm):
sess.run(optimizer, feed_dict={tf_house_size: x, tf_price: y})
# Display current status
if (iteration + 1) % display_every == 0:
c = sess.run(tf_cost, feed_dict={tf_house_size: train_house_size_norm, tf_price: train_price_norm})
print("iteration #:", '%04d' % (iteration + 1), "cost=", "{:.9f}".format(c), \
"size_factor=", sess.run(tf_size_factor), "price_offset=", sess.run(tf_price_offset))
# Save the fit size_factor and price_offfset to allow animation of learning process
fit_size_factor[fit_plot_idx] = sess.run(tf_size_factor)
fit_price_offsets[fit_plot_idx] = sess.run(tf_price_offset)
fit_plot_idx += 1
print('Optimization Finished!')
training_cost = sess.run(tf_cost, feed_dict={tf_house_size: train_house_size_norm, tf_price: train_price_norm})
print('Trained cost=', training_cost, 'size_factor=', sess.run(tf_size_factor), 'price_offset=', sess.run(tf_price_offset), '\n')
# Plot of the training and test data, and learned regression
# Get values sued to normalized data so we can denormalize data back to its original scale
train_house_size_mean = train_house_size.mean()
train_house_size_std = train_house_size.std()
train_price_mean = train_price.mean()
train_price_std = train_price.std()
# Plot the graph
plt.rcParams["figure.figsize"] = (10,8)
plt.figure()
plt.ylabel('Price')
plt.xlabel('Size (sq.ft)')
plt.plot(train_house_size, train_price, 'go', label='Training data')
plt.plot(test_house_size, test_house_price, 'mo', label='Testing data')
plt.plot(train_house_size_norm * train_house_size_std + train_house_size_mean,
(sess.run(tf_size_factor) * train_house_size_norm + sess.run(tf_price_offset)) * train_price_std + train_price_mean,
label='Learned Regression')
plt.legend(loc='upper left')
plt.show()
# Plot another graph that animation of how Gradient Descent sequentially adjusted size_factor and price_offset to
# find the values that returned the "best" fit line
fig, ax = plt.subplots()
line, = ax.plot(house_size, house_price)
plt.rcParams['figure.figsize'] = (10, 8)
plt.title('Gradient Descent Fitting Regression Line')
plt.ylabel('Price')
plt.xlabel('Size (sq.ft)')
plt.plot(train_house_size, train_price, 'go', label='Training data')
plt.plot(test_house_size, test_house_price, 'mo', label='Testing data')
def animate(i):
line.set_xdata(train_house_size_norm * train_house_size_std + train_house_size_mean)
line.set_ydata((fit_size_factor[i] * train_house_size_norm + fit_price_offsets[i]) * train_price_std + train_price_mean)
return line,
# Init only required for blitting to give a clean slate
def initAnim():
line.set_ydata(np.zeros(shape=house_price.shape[0])) # set y's to 0
return line,
ani = animation.FuncAnimation(fig, animate, frames=np.arange(0, fit_plot_idx), init_func=initAnim, interval=1000, blit=True)
plt.show()