-
Notifications
You must be signed in to change notification settings - Fork 2
/
eval.py
106 lines (90 loc) · 3.96 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# -*- coding: utf-8 -*-
import sys
import os
import json
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA
from get_texts_from_path import get_docs, get_vecs
from models.utils import evaluation17
from models import ACModel, NathModel, ZlatkovaModel, KarasModel
from feature_extraction.computation import feature_extractors
from save_predictions import save_predictions, save_vectors
"""Configuration example:
```
{
"tasks":["style_change", "style_breach"],
"model": {
"name": "karas", # other options: "zlatkova", "nath", "pnb", "lof", "ac"
"hyperparams": {
... # list of hyperparams and their values
},
"trainable": false, # true if model is trainable
}
"use_vectors": false, # true if have collected document vectors
"features": {
"extractors": [], # if not empty, should specify feature groups
"selection": false, # if true, performs feature selection
"pca": false # if true, performs dimensionality reduction using PCA
},
"datasets": {
"train": "", # path to train dataset
"dev": "", # path to development dataset
"test": [] # paths to test datasets
},
"outputDir": "", # path to save predictions,
"resultDir": "" # path to save metrics,
"vectorsDir": "" # if use_vectors is False its a path to save a folder with computed vectors in
else a path that contain that filled folder
}
```
"""
methods = {
"zlatkova": ZlatkovaModel,
"nath": NathModel,
"karas": KarasModel,
"ac": ACModel
}
def feature_selection(features):
selector = VarianceThreshold() # selects all features with non-zero variance
return lambda x: selector.fit_transform(features(x))
def pca(features):
reducer = PCA() # picks all principal components
return lambda x: reducer.fit_transform(features(x))
def process_results(test, task, name, results):
print("test data: {},\ntask: {},\nmodel: {},\nprecision = {}, recall = {}, f1-score = {}, accuracy = {}".
format(test, task, name, results[0], results[1], results[2], results[3]))
def eval(config):
features = feature_extractors.keys()
if config["features"]["selection"]:
features = [feature_selection(feature) for feature in features]
if config["features"]["pca"]:
features = pca(features)
method = methods[config["model"]["name"]]
model = method(config["model"]["hyperparams"], features)
if config["model"].get("trainable", False):
model.train(get_docs(config["datasets"]["train"]), get_docs(config["datasets"]["dev"]))
for test_dir in config["datasets"]["test"]:
docs = get_docs(path=test_dir)
texts = [(text, dictionary) for _, text, dictionary in docs]
if config["model"]["name"] in ["ac", "karas"]:
folder_name = "-".join(
[os.path.split(test_dir)[-1], str(config["features"]["selection"]), str(config["features"]["pca"])])
use_vectors = config["use_vectors"]
if config["use_vectors"]:
vecs_for_algorithm = get_vecs(path=test_dir, vecpath=os.path.join(config["vectorsDir"], folder_name))
texts = [(text, vector, dictionary) for _, text, vector, dictionary in vecs_for_algorithm]
predictions_and_vectors = model.test(texts, use_vectors)
predictions = predictions_and_vectors[0]
vectors = predictions_and_vectors[1]
if not use_vectors:
save_vectors(docs, vectors, config, folder_name)
else:
predictions = model.test(texts)
save_predictions(docs, predictions, config, os.path.split(test_dir)[-1])
results = evaluation17.main(docs, predictions, "style_breach" == config["task"], config,
os.path.split(test_dir)[-1])
process_results(test_dir, config["task"], config["model"]["name"], results)
if __name__ == "__main__":
with open("config.json", "r", encoding="utf8") as config_json:
config = json.load(config_json)
eval(config)