-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear_reg.py
39 lines (26 loc) · 1.31 KB
/
linear_reg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
from clean_tabular_data import dframe
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
def Simple_LR(data):
productname_vectorizer = TfidfVectorizer()
vectorised_product_names = productname_vectorizer.fit_transform(data.product_name).todense()
data['vectorised product names'] = vectorised_product_names.tolist()
productdescr_vectorizer = TfidfVectorizer()
vectorised_product_description = productdescr_vectorizer.fit_transform(data.product_description).todense()
data['vectorised product descriptions'] = vectorised_product_description.tolist()
location_vectorizer = TfidfVectorizer()
vectorised_location = location_vectorizer.fit_transform(data.location).todense()
data['vectorised location'] = vectorised_location.tolist()
X = data['vectorised product names'] + data['vectorised product descriptions'] + data['vectorised location']
Y = data['price'].str.replace('£', '').str.replace(',', '').astype(float).values
X = np.array(X.values.tolist())
del data
del vectorised_product_description
del vectorised_location
del vectorised_product_names
regres = LinearRegression().fit(X, Y)
return print(regres.predict(X[:10]))
outcome = Simple_LR(dframe)
outcome