Skip to content

Commit 8ac6444

Browse files
committed
Add bypass attribute into column spec
FeatureExtractor excludes a column which have label or bypass attribute.
1 parent 7dce129 commit 8ac6444

File tree

3 files changed

+21
-12
lines changed

3 files changed

+21
-12
lines changed

featuretools/mkfeat/columnspec.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ def get_label_colname(self):
116116
return colinfo['name']
117117
return None
118118

119+
def get_skip_colnames(self):
120+
colnames = []
121+
for colinfo in self.columns:
122+
if ('label' in colinfo and colinfo['label']) or ('bypass' in colinfo and colinfo['bypass']):
123+
colnames.append(colinfo['name'])
124+
return colnames
125+
119126
def get_is_numerics(self):
120127
"""
121128
importance 결과 구성을 위하여 numeric 컬럼 여부 배열을 추출
@@ -124,7 +131,7 @@ def get_is_numerics(self):
124131
"""
125132
is_numerics = []
126133
for colinfo in self.columns:
127-
if 'label' in colinfo and colinfo['label']:
134+
if 'label' in colinfo and colinfo['label'] or 'bypass' in colinfo and colinfo['bypass']:
128135
is_numerics.append(False)
129136
else:
130137
is_numerics.append(self._is_numeric_type(colinfo['type']))

featuretools/mkfeat/feat_extractor.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,9 @@ def extract_features(self, operators: list) -> Error:
9090
self._elapsed_time.mark()
9191

9292
self.feature_helper = FeatureHelper(features)
93-
df_label = self.es.get_df_label()
94-
if df_label is not None:
95-
self.feature_matrix = self.feature_matrix.merge(df_label, how='left', on=df_label.columns[0])
96-
self.feature_matrix.set_index(df_label.columns[0], inplace=True)
93+
df_skip = self.es.get_df_skip()
94+
if df_skip is not None:
95+
self.feature_matrix = self.feature_matrix.join(df_skip)
9796

9897
if self._proghandler is not None:
9998
self._proghandler(100)

featuretools/mkfeat/qufa_ES.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class QufaES(EntitySet):
1212
def __init__(self):
1313
super().__init__()
1414
self.target_entity_name = None
15-
self._df_label = None
15+
self._df_skip = None
1616

1717
def load_from_csv(self, path, colspec: ColumnSpec) -> Error:
1818
csv = QufaCsv(path, colspec)
@@ -21,10 +21,13 @@ def load_from_csv(self, path, colspec: ColumnSpec) -> Error:
2121
return data
2222

2323
colname_key = colspec.get_key_colname()
24-
colname_label = colspec.get_label_colname()
25-
if colname_label:
26-
self._df_label = data[[colname_key, colname_label]]
27-
data = data.drop(columns=colname_label)
24+
colnames_skip = colspec.get_skip_colnames()
25+
if colnames_skip:
26+
colnames_skip.insert(0, colname_key)
27+
self._df_skip = data[colnames_skip]
28+
self._df_skip.set_index(colname_key, inplace=True)
29+
colnames_skip.remove(colname_key)
30+
data = data.drop(columns=colnames_skip)
2831

2932
norminfos = normalize(data, colname_key)
3033

@@ -39,8 +42,8 @@ def load_from_csv(self, path, colspec: ColumnSpec) -> Error:
3942

4043
return Error.OK
4144

42-
def get_df_label(self):
43-
return self._df_label
45+
def get_df_skip(self):
46+
return self._df_skip
4447

4548
def _search_owner_entity(self, varname):
4649
for et in self.entities:

0 commit comments

Comments
 (0)