-
Notifications
You must be signed in to change notification settings - Fork 0
/
unigram_accuracy.py
73 lines (50 loc) · 1.83 KB
/
unigram_accuracy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#Program to printaccuracy for unigram tagger model
def word_and_tag(phrase):
for i in range(len(phrase)-1,0,-1):
if phrase[i] == "/" :
word = phrase[:i]
tag = phrase[i+1:]
break
return [word,tag]
tag_list = []
with open("brown.txt",'r') as file:
for line in file:
phrase_list = line.split()
if phrase_list != []:
for phrase in phrase_list:
tag = word_and_tag(phrase)[1]
if tag in tag_list:
pass
else:
tag_list.append(tag)
word_tag = {}
with open("uni_test_output.txt","r") as file:
for line in file:
word_list = line.split()
word_tag[word_list[0]] = word_list[1]
tag_matrix = []
for i in range(len(tag_list)):
tag_matrix.append([])
for j in range(len(tag_list)):
tag_matrix[i].append(0)
with open("brown-test.txt",'r') as file:
for line in file:
phrase_list = line.split()
if phrase_list != []:
for phrase in phrase_list:
word = word_and_tag(phrase)[0]
tag = word_and_tag(phrase)[1]
pre_tag = word_tag[word]
a = tag_list.index(tag)
b = tag_list.index(pre_tag)
temp = tag_matrix[b][a]
tag_matrix[b][a] = temp +1
#print(tag_matrix)
num = 0
deno = 0
for i in range(len(tag_matrix)): # Calculating accuracy from confusion matrix. sum of diagonal elements by
for j in range(len(tag_matrix[i])): # sum of total elements
deno = deno + tag_matrix[i][j]
if i == j :
num = num + tag_matrix[i][j]
print("The accuracy of the unigram tagger is ",(num/deno)*100,"%")