-
Notifications
You must be signed in to change notification settings - Fork 2
/
WordList.py
110 lines (93 loc) · 2.81 KB
/
WordList.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import random
import re
# A simpler version of the WordList program specially designed for Hangman.
class WordList:
def __init__(self):
self.wl_name = "wordlist.txt"
myfile = open(self.wl_name, "r")
self.lines = [ line.strip() for line in myfile.readlines() ]
def add_word(self, word):
for i in range(len(self.lines)):
if word < self.lines[i]:
self.lines.insert(i, word)
break
myfile = open(self.wl_name, "w")
for line in self.lines:
myfile.write(line + "\n")
#
# Selects a random word out of the word list.
#
# regex: Only select words that match this regular expression. If none is given,
# select any word with length greater than 0.
#
def random(self, regex = '.+'):
orig = index = random.randrange(len(self.lines))
while True:
if re.search(regex, self.lines[index]) != None:
return self.lines[index]
index += 1
if index >= len(self.lines):
index = 0
if index == orig:
return None
# Takes a simplified regular expression and adds anchors to both ends. Thenn calls
# find_matches().
def find_words(self, regex):
if regex == None:
return self.find_matches('.+')
if not regex.startswith('^'):
regex = '^' + regex
if not regex.endswith('$'):
regex += '$'
return self.find_matches(regex)
# Returns a list of every word that matches regex.
def find_matches(self, regex):
return [line for line in self.lines if re.search(regex, line) != None]
def detailed_frequency(self, words = None):
if words == None:
words = self.lines
counts = [0 for i in range(0, 26)]
letters = [chr(i+ord('a')) for i in range(0, 26)]
# Add up all the letters.
for word in words:
for c in word:
if ord(c) >= ord('a') and ord(c) <= ord('z'):
counts[ord(c) - ord('a')] += 1
# Sort the letters.
i = 0
while i < 25:
if i >= 0 and counts[i] < counts[i+1]:
letters[i], letters[i+1] = letters[i+1], letters[i]
counts[i], counts[i+1] = counts[i+1], counts[i]
i -= 1
else:
i += 1
res = {}
for i in range(0, len(letters)):
res[letters[i]] = counts[i]
return res
# Calculates letter frequency based on the given list of words. If no list is given,
# instead uses the built-in list of words.
def letter_frequency(self, words = None):
if words == None:
words = self.lines
counts = [0 for i in range(0, 26)]
letters = [chr(i+ord('a')) for i in range(0, 26)]
# Add up all the letters.
for word in words:
for c in word:
if ord(c) >= ord('a') and ord(c) <= ord('z'):
counts[ord(c) - ord('a')] += 1
# Sort the letters.
i = 0
while i < 25:
if i >= 0 and counts[i] < counts[i+1]:
letters[i], letters[i+1] = letters[i+1], letters[i]
counts[i], counts[i+1] = counts[i+1], counts[i]
i -= 1
else:
i += 1
freq = ''
for letter in letters:
freq += letter
return freq