-
Notifications
You must be signed in to change notification settings - Fork 6
/
tree.py
44 lines (37 loc) · 1.01 KB
/
tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import numpy as np
import pandas as pd
df = pd.read_csv('features.csv')
all_api_calls_file = open('mixed_dataset/all_api_calls.txt')
all_api_calls = []
#column_names = []
for lines in all_api_calls_file.readlines():
all_api_calls.append(lines[:-1])
f = open('tree.txt','w')
indexed_file = open('training_file_index','r')
target = []
count = 0
for row in indexed_file.readlines():
#print(row)
count = count + 1
#print(count)
components = row.split('-')
name = components[0]
if 'benign' in name:
target.append(0)
else :
target.append(1)
print(target)
indexed_file.close()
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
clf = ExtraTreesClassifier()
clf = clf.fit(df, target)
#print(clf.feature_importances_)
model = SelectFromModel(clf, prefit=True)
X_new = model.transform(df)
print(X_new.shape)
print(model.get_support(indices=True))
selected_features = model.get_support(indices=True)
for a in selected_features:
f.write(all_api_calls[a]+"\n")
f.close()