forked from norvig/pytudes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pal3.py
170 lines (146 loc) · 6.72 KB
/
pal3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from collections import Counter, deque
import re
class PhraseDict(dict):
"""A dictionary of {letters: phrase}, such as {'donaldeknuth': 'Donald E. Knuth'}, with:
.prefixes: Counter of {'pre': n} where n is the number of keys that start with 'pre'
.suffixes: Counter of {'xes': n} where n is the number of keys that end with 'xes'"""
def __init__(self, phrases):
for phrase in phrases:
phrase = phrase.strip()
self[letters(phrase)] = phrase
self.prefixes = Counter(x for p in self for x in prefixes(p))
self.suffixes = Counter(x for p in self for x in suffixes(p))
def prefixes(phrase): return [phrase[:i] for i in range(1, len(phrase) + 1)]
def suffixes(phrase): return [phrase[-i:] for i in range(1, len(phrase) + 1)]
def letters(phrase, sub=re.compile(r'[\W]+').sub):
"Remove all the non-letters from phrase; return lowercase version."
return sub('', phrase).lower()
DICT = PhraseDict(open('npdict.txt'))
class Panama:
"""Panama represents a palindrome, or a state in searching for one.
It has .left and .right to hold the phrases that are chosen,
and .L and .R to hold the current partial phrases in the middle (still working on these).
Also, a .set of all complete phrases, and the .dict of allowable phrases to choose from."""
def __init__(self, left=['aman', 'aplan'], L='aca', R='', right=['acanal', 'panama'], dict=DICT):
assert cat(left + [L]) == cat([R] + right)[::-1]
self.left = list(left) # list of complete phrases on left
self.L = L # an incomplete phrase on left
self.R = R # an incomplete phrase on right
self.right = deque(right) # deque of complete phrases on right
self.dict = dict # a {letters: actual_phrase} mapping
self.set = set(left + right) # a set of all complete phrases in palindrome
self.best = [] # list of phrases in longest palindrome found
self.Nshown = 0 # the number of phrases shown in the previous printout
self.i = 0 # the number of steps taken in the search
self.check()
def __str__(self): return self.original_phrases(self.best)
def original_phrases(self, phrases): return ', '.join(self.dict[phrase] for phrase in phrases)
def search(self, steps=10**5):
"""Depth-first search for palindromes. From the current state, find all applicable actions.
Do the first one, and put on the stack reminders to undo it and try the others,
but first search deeper from the result of the first action."""
stack = [self.applicable_actions()]
for self.i in range(steps):
if not stack:
return
command = stack.pop()
if isinstance(command, UndoCommand):
self.undo(command)
elif command:
act = command.pop()
self.do(act)
self.check()
stack.extend([command, UndoCommand(act), self.applicable_actions()])
def do(self, act):
"Modify the current state by adding a letter, or finishing a phrase."
if act == ',': # finish phrase on left
self.set.add(self.L)
self.left.append(self.L)
self.L = ''
elif act == ';': # finish phrase on right
self.set.add(self.R)
self.right.appendleft(self.R)
self.R = ''
else: # add a letter
self.L = self.L + act
self.R = act + self.R
def undo(self, act):
"Modify the current state by undoing an action that was previously done."
if act == ',': # unfinish phrase on left
assert self.L == ''
self.L = self.left.pop()
self.set.remove(self.L)
elif act == ';': # unfinish phrase on right
assert self.R == ''
self.R = self.right.popleft()
self.set.remove(self.R)
else: # remove a letter
self.L = self.L[:-1]
self.R = self.R[1:]
def check(self):
"Check to see if current state is a palindrome, and if so, record it and maybe print."
if not self.is_palindrome(): return
N = len(self.left) + len(self.right)
if N > len(self.best):
self.best = self.left + list(self.right)
if N - self.Nshown > 1000 or (N > 14000 and N - self.Nshown > 100) or N > 14500:
self.Nshown = N
print(self.report())
def report(self):
N = len(self.best)
nwords = N + sum(self.dict[p].count(' ') for p in self.best)
nletters = sum(len(p) for p in self.best)
return ('Pal: {:6,d} phrases, {:6,d} words, {:6,d} letters (at step {:,d})'
.format(N, nwords, nletters, self.i+1))
def applicable_actions(self):
L, R, D = self.L, self.R, self.dict
actions = []
def score(A): return D.prefixes[L+A] * D.suffixes[A+R]
if self.is_allowed(L):
actions.append(',')
if self.is_allowed(R):
actions.append(';')
for A in sorted(alphabet, key=score):
if score(A) > 0:
actions.append(A)
return actions
def is_allowed(self, phrase): return phrase in self.dict and phrase not in self.set
def is_palindrome(self):
"Is this a palindrome? (Does any extra .L or .R match the other side?)"
return ((self.L == '' and self.left[-1].endswith(self.R)) or
(self.R == '' and self.right[0].startswith(self.L)))
alphabet = 'abcdefghijklmnopqrstuvwxyz'
cat = ''.join
UndoCommand = str
DoCommand = list
################ Unit Tests
def test1():
assert prefixes('hello') == ['h', 'he', 'hel', 'hell', 'hello']
assert suffixes('hello') == ['o', 'lo', 'llo', 'ello', 'hello']
assert letters('a man') == 'aman'
assert letters('an elk') == 'anelk'
assert letters('Mr. T') == 'mrt'
assert letters('Donald E. Knuth') == 'donaldeknuth'
assert len(DICT) == 125512
assert 'panama' in DICT
assert 'aman' in DICT
assert 'threemen' not in DICT
assert DICT['acanal'] == 'a canal'
return 'ok'
def test2():
p1 = Panama()
assert p1.is_palindrome()
assert str(p1) == 'a man, a plan, a canal, Panama'
p2 = Panama(['aman','aplan'], 'acadd','dd', ['acanal', 'panama'])
assert not p2.is_palindrome()
p3 = Panama(['maya'], '', '', ['ayam'])
assert p3.is_palindrome()
assert str(p3) == 'Maya, a yam'
return 'ok'
if __name__ == '__main__':
p = Panama();
test1()
test2()
p.search(10**6)
print(p.report())
print(str(p))