forked from benhamner/JobSalaryPrediction
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_io.py
46 lines (38 loc) · 1.29 KB
/
data_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import csv
import json
import numpy as np
import os
import pandas as pd
import pickle
def get_paths():
paths = json.loads(open("Settings.json").read())
for key in paths:
paths[key] = os.path.expandvars(paths[key])
return paths
def identity(x):
return x
# For pandas >= 10.1 this will trigger the columns to be parsed as strings
converters = { "FullDescription" : identity
, "Title": identity
, "LocationRaw": identity
, "LocationNormalized": identity
}
def get_train_df():
train_path = get_paths()["train_data_path"]
return pd.read_csv(train_path, converters=converters)
def get_valid_df():
valid_path = get_paths()["valid_data_path"]
return pd.read_csv(valid_path, converters=converters)
def save_model(model):
out_path = get_paths()["model_path"]
pickle.dump(model, open(out_path, "w"))
def load_model():
in_path = get_paths()["model_path"]
return pickle.load(open(in_path))
def write_submission(predictions):
prediction_path = get_paths()["prediction_path"]
writer = csv.writer(open(prediction_path, "w"), lineterminator="\n")
valid = get_valid_df()
rows = [x for x in zip(valid["Id"], predictions.flatten())]
writer.writerow(("Id", "SalaryNormalized"))
writer.writerows(rows)