-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautomata.py
333 lines (311 loc) · 12.8 KB
/
automata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
from typing import Iterable
from prettytable import PrettyTable
from graphviz import Digraph
class Disjoint:
'''
并查集, 用于计算等价关系
'''
def __init__(self, elems={}):
self.parent = dict()
for elem in set(elems):
self.parent[elem] = elem
def add(self, elem):
self.parent[elem] = elem
def root(self, elem):
'''
查找某个元素的 root (路径压缩)
'''
while elem != self.parent[elem]:
self.parent[elem] = self.parent[self.parent[elem]]
elem = self.parent[elem]
return elem
def join(self, elem1, elem2):
'''
连接两个元素
'''
root1 = self.root(elem1)
root2 = self.root(elem2)
self.parent[root1] = root2
def issame(self, elem1, elem2):
'''
判断两个元素是否有相同的 root
'''
return self.root(elem1) == self.root(elem2)
def ofroot(self, elem):
'''
查询具有公共的某个 root 的所有元素
'''
return set(filter(lambda e : self.root(e) == elem, self.parent.keys()))
def allroots(self):
'''
查询该并查集内所有作为 root 的元素
'''
return set(map(lambda e : self.root(e), self.parent.keys()))
def reset(self, elem):
'''
重置某个元素的 parent
'''
self.parent[elem] = elem
class Automata:
'''
有穷自动机:
- 有穷状态集(对应有向图中的节点)
- 输入字母表(状态转移边的标注)
- 状态转移规则(对应有向图中的边)
- 初始状态
- 终止状态集
'''
def __init__(self, start, finish, trans):
'''
params:
- start: 初始状态, 可为整数或字符串
- finish: 终止状态集, 可以为列表、元组等
- trans: 转移规则, 由若干个三元组构成,每个三元组格式为:(src, dst, by) 表示从 src 输入 by 可以转移到 dst, 其中 by 为字母.
'''
self.states = set()
self.tokens = set()
self.finish = set(finish)
self.trans = dict()
self.start = start
for tran in trans:
src = tran[0]
dst = tran[1]
by = tran[2]
self.states.add(src) # 原始状态
self.states.add(dst) # 目标状态
if by:
self.tokens.add(by) # 转移字母, 用 '' 表示 epsilon
if not src in self.trans.keys():
self.trans[src] = dict()
if not by in self.trans[src].keys():
self.trans[src][by] = set()
self.trans[src][by].add(dst) # 创建转移边
# 检查转移图的合法性:
# 1. 起始状态必须在状态集中
# 2. 终止状态必须在状态集中
assert start in self.states
assert isinstance(finish, Iterable)
assert all(map(lambda x : x in self.states, self.finish))
def show(self):
'''
使用 Graphviz 库打印该自动机
'''
graph = Digraph('Automata')
for state in self.states:
if state in self.finish:
graph.node(name=str(state), color='black', shape='doublecircle')
else:
graph.node(name=str(state), color='black', shape='circle')
if state == self.start:
graph.node(name='start', shape='plain')
graph.edge(tail_name='start', head_name=str(state))
for src in self.trans.keys():
for by in self.trans[src].keys():
for dst in self.trans[src][by]:
graph.edge(tail_name=str(src), head_name=str(dst), label=str(by))
graph.view()
def print_trans_table(self):
'''
打印该自动机的转移矩阵
'''
tokens = sorted(self.tokens)
deterministic = self.is_deterministic()
field_names = tokens if deterministic else (tokens + [''])
pretty = PrettyTable(field_names=['.'] + field_names)
for src in self.trans.keys():
row = [src]
for token in tokens:
if not token in self.trans[src].keys():
row.append('')
continue
dst = self.trans[src][token]
if len(dst) > 1:
dst = str(dst)
else:
dst = list(dst)[0]
row.append(dst)
if not deterministic: # 非确定
dst = set()
for by in self.trans[src]:
if not by:
dst = dst.union(self.trans[src][by])
if len(dst) > 1:
dst = str(dst)
elif len(dst) == 1:
dst = list(dst)[0]
else:
dst = ''
row.append(dst)
pretty.add_row(row)
print(pretty)
def is_deterministic(self) -> bool:
'''
是否为确定的有穷自动机(DFA)
'''
# 检验标准:
# 1. 不能有 epsilon 作为输入的字母
# 2. 存在某状态对于某字母有多种后继状态
for state in self.trans.keys():
for by in self.trans[state].keys():
if not by:
return False
if len(self.trans[state][by]) > 1:
return False
return True
def solve_epsilon_closure(self, state):
assert state in self.states
closure = set({state})
while True:
done = True
for cstate in closure:
if not cstate in self.trans.keys():
return closure
for by in self.trans[cstate].keys():
if not by:
if len(self.trans[cstate][by].difference(closure)) > 0:
done = False
closure = closure.union(self.trans[cstate][by])
if done:
break
return closure
def to_deterministic(self):
'''
非确定有穷自动机 (NFA) 的确定化, 返回转化后的 DFA, 并输出转化过程的表格
'''
# 1. epsilon-闭包,eps-closure(I),其中 I 为状态集合
# 2. I(a) = closure(J), 其中 J 为 I 中每个状态出发经过标记为 a 的弧所到达的集合
tokens = sorted(self.tokens) # 按字母的顺序列状态转移
log_table = [] # 记录画表格的过程
# 按照 PPT 的做法进行 BFS
queue = []
visit_list = []
I_start = self.solve_epsilon_closure(self.start)
queue.append(I_start)
visit_list.append(I_start)
while len(queue) > 0:
I_now = queue.pop(0)
# 求 J 与 closure(J)
row = [I_now]
for token in tokens:
J_next = set()
# 对 I 中的任一个状态,令其先经过 token 再经过 epsilon
for state in I_now:
if (not state in self.trans.keys()) or (not token in self.trans[state].keys()):
continue
dsts = self.trans[state][token]
for dst in dsts:
J_next = J_next.union(self.solve_epsilon_closure(dst))
row.append(J_next) # 记录 I_a
if len(J_next) == 0 or J_next in visit_list:
continue
visit_list.append(J_next)
queue.append(J_next) # 将新状态插入队列
log_table.append(row)
# 输出过程的表格
pretty = PrettyTable(field_names=[''] + tokens)
for log in log_table:
pretty.add_row(list(map(lambda s : 'null' if len(s) == 0 else str(s), log)))
print(pretty)
# 构造新的自动机
nstates = dict() # 新的状态命名
for i, nstate in enumerate(visit_list):
print("rename {0} --> {1}".format(nstate, i + 1)) # 可以在这里调整新状态的命名,从 0 开始/从 1 开始/从 'a' 开始 ......
nstates[str(nstate)] = i + 1
nstart = nstates[str(I_start)]
nfinish = set()
for nstate in visit_list:
if len(self.finish.intersection(nstate)) > 0:
nfinish.add(nstates[str(nstate)])
ntrans = list()
for log in log_table:
for i in range(len(tokens)):
if len(log[i + 1]) > 0:
ntrans.append([nstates[str(log[0])], nstates[str(log[i + 1])], tokens[i]])
# print(ntrans)
return Automata(nstart, nfinish, ntrans)
def minify(self):
'''
确定有穷自动机 (DFA) 的最小化, 采用教材上的分割法
'''
assert self.is_deterministic() # 必须是确定的有穷自动机
# 首先用一个并查集表示分区,root 相同的状态位于一个分区
group_div = Disjoint()
for state in self.states:
group_div.add(state)
# 首先按终止状态和非终止状态分区
fin = list(self.finish)[0] # 任取一个终止状态
for fstate in self.finish:
group_div.join(fstate, fin)
if len(self.finish) != len(self.states): # 有非终止状态, 则初始分成两个区, 否则只有一个初始分区
nfin = list(self.states.difference(self.finish))[0] # 任取一个非终止状态
for state in self.states:
if not state in self.finish:
group_div.join(state, nfin)
round_count = 0
while True:
# 对每个区进行处理, 直到每个区内的状态均两两等价
groups = group_div.allroots()
# 输出分区状态
round_count += 1
print("Minify round {0} ......".format(round_count))
for state in self.states:
print("state {0} is in group {1}".format(state, group_div.root(state)))
done = True
for group in groups: # 对每个区进行处理
states = list(group_div.ofroot(group))
equals = Disjoint(states) # 描述等价关系的并查集
for i in range(len(states)):
for j in range(i + 1, len(states)):
state1 = states[i]
state2 = states[j]
if state1 == state2:
continue
# 对当前分区的任意两个状态, 每个状态的所有转移都必须到等价的状态
# 因为有些状态没有对应输入的转移边,所以:要么都不转移,要么都转移到等价状态 => 等价
eq = True
for token in self.tokens:
can1 = state1 in self.trans.keys() and token in self.trans[state1].keys()
can2 = state2 in self.trans.keys() and token in self.trans[state2].keys()
if not can1 and not can2:
continue
if can1 and can2:
dst1 = list(self.trans[state1][token])[0]
dst2 = list(self.trans[state2][token])[0]
adst1 = group_div.root(dst1)
adst2 = group_div.root(dst2)
if adst1 != adst2:
eq = False
break
else:
eq = False
break
if eq:
equals.join(state1, state2)
if len(equals.allroots()) > 1:
done = False
# 对当前区重新分区
for state in states:
group_div.reset(state)
for state in states:
group_div.join(state, equals.root(state))
if done:
break
# 按分区的种类重新编号
nstate = dict()
groups = group_div.allroots()
for i, group in enumerate(groups):
nstate[group] = i + 1
# 更新状态转移
ntrans = []
for src in self.trans.keys():
for by in self.trans[src].keys():
dst = list(self.trans[src][by])[0]
asrc = nstate[group_div.root(src)]
adst = nstate[group_div.root(dst)]
if not [asrc, adst, by] in ntrans:
ntrans.append([asrc, adst, by])
nstart = nstate[group_div.root(self.start)]
nfinish = set()
for fstate in self.finish:
nfinish.add(nstate[group_div.root(fstate)])
return Automata(nstart, nfinish, ntrans)