-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_openfe.py
161 lines (114 loc) · 5.07 KB
/
my_openfe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""
本檔案包含了所有和「OpenFE」有關的函式。
詳細功能與輸入、輸出請見各函式的 docstring。
"""
from logging import getLogger
from openfe import OpenFE, get_candidate_features, transform
from os.path import join
from pandas import DataFrame
from pickle import dump
from typing import Any, Dict
logger = getLogger(__name__)
def get_and_save_candidate_features(params: Dict[str, Any]):
""" Get and save candidate features.
Args:
params (Dict[str, Any]): The parameters for getting candidate features.
"""
logger.info(msg="Getting candidate features has been started.")
candidate_features = get_candidate_features(
numerical_features=params["numerical_features"],
categorical_features=params["categorical_features"],
ordinal_features=params["ordinal_features"])
logger.info(msg="Getting candidate features has been finished.")
file = join(params["directory"],
f"{params['version']}_candidate_features.pkl")
logger.info(msg="Saving candidate features has been started.")
with open(file=file, mode="wb") as f:
dump(obj=candidate_features, file=f)
f.close()
logger.info(msg="Saving candidate features has been finished.")
params["candidate_features"] = candidate_features
def get_and_save_features(params: Dict[str, Any]):
""" Get and save features.
Args:
params (Dict[str, Any]): The parameters for getting features.
"""
logger.info(msg="Getting features has been started.")
ofe = OpenFE()
features = ofe.fit(data=params["train_data"]["x"],
label=params["train_data"]["y"],
task=params["task"],
candidate_features_list=params["candidate_features"],
categorical_features=params["categorical_features"],
metric=params["metric"],
n_data_blocks=params["n_data_blocks"],
min_candidate_features=params["min_candidate_features"],
stage2_params=params["stage2_params"],
n_jobs=params["n_jobs"],
seed=params["seed"])
logger.info(msg="Getting features has been finished.")
file = join(params["directory"],
f"{params['version']}_features.pkl")
logger.info(msg="Saving features has been started.")
with open(file=file, mode="wb") as f:
dump(obj=features, file=f)
f.close()
logger.info(msg="Saving features has been finished.")
file = join(params["directory"], f"{params['version']}_openfe.pkl")
logger.info(msg="Saving openfe model has been started.")
with open(file=file, mode="wb") as f:
dump(obj=ofe, file=f)
f.close()
logger.info(msg="Saving openfe model has been finished.")
def get_and_save_x_data(params: Dict[str, Any]):
""" Get and save x data.
Args:
params (Dict[str, Any]): The parameters for getting x data.
"""
if len(params["features"]) > 30:
params["features"] = params["features"][:30]
logger.info(msg="Transforming data has been started.")
train_x, unlabeled_x = transform(X_train=params["train_data"]["x"],
X_test=params["unlabeled_data"]["x"],
new_features_list=params["features"],
n_jobs=params["n_jobs"])
validate_x, _ = transform(X_train=params["validate_data"]["x"],
X_test=DataFrame(),
new_features_list=params["features"],
n_jobs=params["n_jobs"])
logger.info(msg="Transforming data has been finished.")
logger.info(msg="Saving transformed data has been started.")
file = join(params["directory"], f"{params['version']}_train_x.h5")
train_x.to_hdf(path_or_buf=file,
key="data",
mode="w",
format="table",
index=False)
file = join(params["directory"],
f"{params['version']}_unlabeled.h5")
unlabeled_x.to_hdf(path_or_buf=file,
key="data",
mode="w",
format="table",
index=False)
file = join(params["directory"],
f"{params['version']}_new_public_x.h5")
validate_x.to_hdf(path_or_buf=file,
key="data",
mode="w",
format="table",
index=False)
logger.info(msg="Saving transformed data has been finished.")
def openfe_inference(params: Dict[str, Any]):
""" Do OpenFE inference.
Args:
params (Dict[str, Any]): The parameters for OpenFE inference.
"""
get_and_save_x_data(params=params)
def openfe_train(params: Dict[str, Any]):
""" Do OpenFE training.
Args:
params (Dict[str, Any]): The parameters for OpenFE training.
"""
get_and_save_candidate_features(params=params)
get_and_save_features(params=params)