-
Notifications
You must be signed in to change notification settings - Fork 0
/
dantecounts.py
executable file
·116 lines (96 loc) · 3.33 KB
/
dantecounts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 21 17:05:17 2018
@author: moroa
"""
from dantepage import dantePage
import os,re
import urllib.parse
class danteCountsPage (dantePage):
def __init__ (self,episodeN=0,resetcache=0):
dantePage.__init__ (self,t= "Divina commedia contatore di parole",
h="Divina commedia contatore di parole")
dir_path = os.path.dirname(os.path.realpath(__file__))
textFile = open(dir_path+"/1012-0.txt","r")
self.lines = textFile.read().split("\n")
self.counts = {}
try :
self.url = os.environ['QUERY_STRING']
query = urllib.parse.parse_qs(self.url)
except :
query = list()
print('')
if 'e' in query :
self.episodeN = int(query['e'][0])
else :
self.episodeN = episodeN
if 'cs' in query :
self.casesens = query['cs'][0]
else :
self.casesens = 'off'
self.resetcache = 0
if 'resetcache' in query :
self.resetcache = 1
# #Test case
# self.lines = list()
# self.lines.append("aaa aaa Aaa -bca dea aaa Introibo")
# self.epbounds[0] = 0
def word_count(self,string):
if self.casesens == 'off':
string = string.lower()
words = re.split("\W+",string)
for word in words:
if word in self.counts and word != '' :
self.counts[word] += 1
else:
self.counts[word] = 1
return 1
def count(self,ep) :
if ep==0:
fromline = self.epbounds[0]
toline = self.epbounds[100]
else :
fromline = self.epbounds[ep-1]
toline = self.epbounds[ep]
for line in self.lines[fromline:toline-1]:
self.word_count(line)
wcsort = sorted(self.counts,key=self.counts.__getitem__,reverse=True)
return (wcsort)
def generate_body (self):
dir_path = os.path.dirname(os.path.realpath(__file__))+'/cache'
# try to get from cache
if not self.resetcache :
try :
textFile = open(dir_path+"/count-e"+str(self.episodeN)+"-cs"+str(self.casesens),"r")
return textFile.read()+"<p style='font-size:10px'>(cached)</p>"
except :
1
sortedLines = self.count(self.episodeN)
html = ""
html += self.printEpisodeList()
html+= "<div id='form'>\n"
html+= self.printForm()
html+= self.printCountForm()
html+= "</div>\n"
html += "<div id='text'> "
html += "<h2> Word counts"
if self.episodeN>0 :
html+= ": "+self.epnames[self.episodeN-1]+" "
html += "</h2>\n"
count = 1E10
for line in sortedLines:
thiscount = p.counts[line]
if thiscount <count :
count = thiscount
html+= "<h3>"+str(count)+"</h3>"
html+= "<a href='dantepage.py?cs="+str(self.casesens)+"&w="+line+"'>"+line+"</a> \n"
try:
textFile = open(dir_path+"/count-e"+str(self.episodeN)+"-cs"+str(self.casesens),"w")
textFile.write(html)
except :
1
return html
if __name__ == "__main__":
p = danteCountsPage(episodeN=0)
print(p.generate())