-
Notifications
You must be signed in to change notification settings - Fork 1
/
evaluate_similarity.py
35 lines (30 loc) · 1.29 KB
/
evaluate_similarity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# -*- coding: utf-8 -*-
"""
Simple example showing evaluating embedding on similarity datasets
"""
import logging
from six import iteritems
from web.datasets.similarity import fetch_MEN, fetch_WS353, fetch_SimLex999, fetch_SimVerb3500, fetch_SCWS, fetch_RG65
from web.embeddings import load_embedding
from web.evaluate import evaluate_similarity
# Configure logging
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%I:%M:%S')
# Fetch embedding (warning: it might take few minutes)
fname = "./output/embeddings/best_model_dict_euclidean_200"
embeddings = load_embedding(fname, format="dict", normalize=True, lower=True, clean_words=False)
# Define tasks
tasks = {
"MEN-dev": fetch_MEN(which="dev"),
"MEN-test": fetch_MEN(which="test"),
"SimVerb3500-dev": fetch_SimVerb3500(which="dev"),
"SimVerb3500-test": fetch_SimVerb3500(which="test"),
"WS353": fetch_WS353(),
"WS353-Sim": fetch_WS353(which="similarity"),
"WS353-Rel": fetch_WS353(which="relatedness"),
"SimLex999": fetch_SimLex999(),
"SCWS": fetch_SCWS(),
"RG": fetch_RG65()
}
# Calculate results using helper function
for name, data in iteritems(tasks):
print("Spearman correlation of scores on {} {}".format(name, evaluate_similarity(embeddings, data.X, data.y)))