-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtraining_model.py
60 lines (43 loc) · 1.58 KB
/
training_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle
from sklearn.ensemble import RandomForestClassifier
df = pd.read_csv('train.csv')
def get_title(name):
if '.' in name:
return name.split(',')[1].split('.')[0].strip()
else:
return 'Unknown'
def replace_titles(x):
title = x['Title']
if title in ['Capt', 'Col', 'Major']:
return 'Officer'
elif title in ["Jonkheer","Don",'the Countess', 'Dona', 'Lady',"Sir"]:
return 'Royalty'
elif title in ['the Countess', 'Mme', 'Lady']:
return 'Mrs'
elif title in ['Mlle', 'Ms']:
return 'Miss'
else:
return title
df['Title'] = df['Name'].map(lambda x: get_title(x))
df['Title'] = df.apply(replace_titles, axis=1)
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Fare'].fillna(df['Fare'].median(), inplace=True)
df['Embarked'].fillna("S", inplace=True)
df.drop("Cabin", axis=1, inplace=True)
df.drop("Ticket", axis=1, inplace=True)
df.drop("Name", axis=1, inplace=True)
df.Sex.replace(('male','female'), (0,1), inplace = True)
df.Embarked.replace(('S','C','Q'), (0,1,2), inplace = True)
df.Title.replace(('Mr','Miss','Mrs','Master','Dr','Rev','Officer','Royalty'), (0,1,2,3,4,5,6,7), inplace = True)
x = df.drop(['Survived', 'PassengerId'], axis = 1)
y = df['Survived']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1)
randomforest = RandomForestClassifier()
randomforest.fit(x_train, y_train)
pickle.dump(randomforest, open('titanic_model.sav', 'wb'))
# In[ ]: