-
Notifications
You must be signed in to change notification settings - Fork 1
/
stringsearchalgos.py
executable file
·148 lines (122 loc) · 3.38 KB
/
stringsearchalgos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
'''
All the basic string searching algorithms are coded here.
'''
#naive algorithm
def naive(p, t):
count = 0
for i in range(len(t) - len(p) + 1):
match = True
for j in range(len(p)):
if t[i+j] != p[j]:
match = False
break
if match:
count += 1
return count
#KMP String Search
def KMPSearch(pat, txt):
count = 0
M = len(pat)
N = len(txt)
# create lps[] that will hold the longest prefix suffix
# values for pattern
lps = [0]*M
j = 0 # index for pat[]
# Preprocess the pattern (calculate lps[] array)
computeLPSArray(pat, M, lps)
i = 0 # index for txt[]
while i < N:
if pat[j] == txt[i]:
i += 1
j += 1
if j == M:
count += 1
j = lps[j-1]
# mismatch after j matches
elif i < N and pat[j] != txt[i]:
# Do not match lps[0..lps[j-1]] characters,
# they will match anyway
if j != 0:
j = lps[j-1]
else:
i += 1
return count
def computeLPSArray(pat, M, lps):
len = 0 # length of the previous longest prefix suffix
lps[0] # lps[0] is always 0
i = 1
# the loop calculates lps[i] for i = 1 to M-1
while i < M:
if pat[i]==pat[len]:
len += 1
lps[i] = len
i += 1
else:
# This is tricky. Consider the example.
# AAACAAAA and i = 7. The idea is similar
# to search step.
if len != 0:
len = lps[len-1]
# Also, note that we do not increment i here
else:
lps[i] = 0
i += 1
NO_OF_CHARS = 256
#Finite Automata string searching
def getNextState(pat, M, state, x):
'''
calculate the next state
'''
# If the character c is same as next character
# in pattern, then simply increment state
if state < M and x == ord(pat[state]):
return state+1
i=0
# ns stores the result which is next state
# ns finally contains the longest prefix
# which is also suffix in "pat[0..state-1]c"
# Start from the largest possible value and
# stop when you find a prefix which is also suffix
for ns in range(state,0,-1):
if ord(pat[ns-1]) == x:
while(i<ns-1):
if pat[i] != pat[state-ns+1+i]:
break
i+=1
if i == ns-1:
return ns
return 0
def computeTF(pat, M):
'''
This function builds the TF table which
represents Finite Automata for a given pattern
'''
global NO_OF_CHARS
TF = [[0 for i in range(NO_OF_CHARS)]\
for _ in range(M+1)]
for state in range(M+1):
for x in range(NO_OF_CHARS):
z = getNextState(pat, M, state, x)
TF[state][x] = z
return TF
def FAsearch(pat, txt):
'''
Prints all occurrences of pat in txt
'''
count = 0
global NO_OF_CHARS
M = len(pat)
N = len(txt)
TF = computeTF(pat, M)
# Process txt over FA.
state=0
for i in range(N):
state = TF[state][ord(txt[i])]
if state == M:
count += 1
return count
# Driver program to test above function
if __name__ == '__main__':
txt = "ABABDABACDABABCABAB"
pat = "ABABCABAB"
print(FAsearch(pat, txt))