-
Notifications
You must be signed in to change notification settings - Fork 3
/
feature_entry.py
88 lines (73 loc) · 2.57 KB
/
feature_entry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import meta
class FeatureEntry:
def __init__(self, tag, required, default):
self.tag = tag
self.required = required
self.default = default
self.idx_map = dict()
def add_sample(self, entry):
if (len(entry) == 0 or entry == "NA"):
return
if (self.tag == meta.FEATURE_LIST_ENUM):
options = self.split_list(entry)
for o in options:
if (o in self.idx_map):
continue
idx = len(self.idx_map)
self.idx_map[o] = idx
elif (self.tag == meta.FEATURE_ENUM):
if (entry in self.idx_map):
return
self.idx_map[entry] = len(self.idx_map)
def parse(self, entry):
empty = entry == "NA" or len(entry.strip()) == 0
if (self.required and empty):
return None
if (self.tag == meta.FEATURE_NO):
return []
if (self.tag == meta.FEATURE_LIST_ENUM):
options = self.split_list(entry)
result = [0] * (len(self.idx_map) + 1)
if (empty):
result[-1] = 1
return result
for o in options:
result[self.idx_map[o]] = 1
return result
if (self.tag == meta.FEATURE_ENUM):
result = [0] * (len(self.idx_map) + 1)
if (empty):
result[-1] = 1
return result
result[self.idx_map[entry]] = 1
return result
if (self.tag == meta.FEATURE_RANGE):
if (empty):
return [0, 1]
result_str = entry.split('-')[0].strip()
result_str = result_str.split('+')[0].strip()
return [float(result_str), 1]
if (self.tag == meta.FEATURE_NUM):
if (empty):
return [0, 1]
return [float(entry), 0]
if (self.tag == meta.FEATURE_BIN):
result = [0, 0]
if (empty):
result[-1] = 1
return result
result[0] = int(entry.strip())
return result
if (self.tag == meta.FEATURE_LIST_MAX):
if (empty):
return [0, 1]
max_entry = entry.split(';')[-1].strip()
max_entry = max_entry.split(' ')[-1].strip()
max_entry = max_entry.split(',')[-1].strip()
return [float(max_entry), 0]
def split_list(self, entry):
options = entry.split(';')
options = list(map(
lambda o: o.strip().lower(), options
))
return options