-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path10RegularExpressionMatching.cs
224 lines (182 loc) · 8.15 KB
/
10RegularExpressionMatching.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace _10RegularExpressionMatching
{
class Program
{
static void Main(string[] args)
{
}
/*
'.' Matches any single character.
'*' Matches zero or more of the preceding element.
The matching should cover the entire input string (not partial).
The function prototype should be:
bool isMatch(const char *s, const char *p)
Some examples:
isMatch("aa","a") → false
isMatch("aa","aa") → true
isMatch("aaa","aa") → false
isMatch("aa", "a*") → true
isMatch("aa", ".*") → true
isMatch("ab", ".*") → true
isMatch("aab", "c*a*b") → true
* */
/*
* source code from blog:
* http://www.busy-beaver.me/leetcode-10-regular-expression-matching/
*
* Analysis from blog:
* 动态规划法。
dp[i+1][j+1]表示s[0..i]是否匹配p[0..j]。状态转移方程如下:
若p[j] != '*',dp[i+1][j+1] = (s[i] == p[j] || p[j] == '*') && dp[i][j]
若p[j] == '*'
匹配了0个字符。dp[i+1][j+1] = dp[i+1][j-1](跳过p[j]的*和*前面的p[j-1]。
匹配了1个字符。dp[i+1][j+1] = dp[i+1][j](跳过p[j]的*)
匹配了2个或2个以上的字符。dp[i+1][j+1] = dp[i][j+1] && (p[j-1] == s[i] || p[j-1] == '.')
(s[0..i-1]与p[0..j]匹配,并且p[j-1]与s[i]匹配)
*/
bool isMatch(string s, string p) {
int sLen = s.Length,
pLen = p.Length;
bool[][] dp = new bool[sLen + 1][]; // pLen + 1
for (int i = 0; i < sLen + 1; i++)
dp[i] = new bool[pLen + 1];
dp[0][0] = true;
for (int i = 1; i <= sLen; i++)
dp[i][0] = false;
for (int j = 1; j <= pLen; j++)
dp[0][j] = j > 1 && p[j-1] == '*' && dp[0][j-2];
for (int i = 0; i < sLen; i++) {
for (int j = 0; j < pLen; j++) {
if (p[j] != '*') {
dp[i+1][j+1] = (p[j] == '.' || s[i] == p[j]) && dp[i][j];
} else {
dp[i+1][j+1] = j > 0 && dp[i+1][j-1] || dp[i+1][j] || j > 0 && (p[j-1] == '.' || s[i] == p[j-1]) && dp[i][j+1];
}
}
}
return dp[sLen][pLen];
}
/*
* source code from blog:
* http://www.cnblogs.com/yuzhangcmu/p/4105529.html
* DP:
D[i][j]: 表示string s中取i长度的字串,string p中取j长度字串,进行匹配。
状态转移:
1. j >= 2 && P[j - 1] = *,这时,我们可以选择匹配s中的空字串,或匹配无限个。
k: 在s中匹配的字符的个数
所以转移式是:D[i][j] = D[i - k][j - 2] && isSame(s.charAt(i - k), p.charAt(j - 2)) (k: 1-i)
D[i - k][j - 2] (k = 0)
2. p最后一个字符不是*
那么首先,s中至少还要有一个字符,然后再匹配一个字符,以及上一级也要匹配即可。
D[i][j] = i >= 1
&& isSame(s.charAt(i - 1), p.charAt(j - 1))
&& D[i - 1][j - 1];
3. j = 0;
D[i][j] = i == 0; (p为空,则s也是要为空才可以匹配)
以下是运行时间(LEETCODE这道题目的数据太弱了... orz),看不出太大的区别。
* source code from blog:
* https://github.com/yuzhangcmu/LeetCode/blob/master/string/isMatch_2014_1228.java
*/
public static bool isMatch(String s, String p) {
if (s == null || p == null) {
return false;
}
int sLen = s.Length, pLen = p.Length;
bool[][] D = new bool[sLen + 1][]; // pLen + 1
for (int i = 0; i < sLen + 1; i++)
D[i] = new bool[pLen + 1];
// D[i][j]: i, j, the length of String s and String p.
for (int i = 0; i <= sLen; i++) {
for (int j = 0; j <= pLen; j++) {
if (j == 0) {
// when p is empty, then s should be empty.
D[i][j] = i == 0;
} else if (p[j - 1] == '*') {
/*
P has at least one node.
*/
// The last node in p is '*'
if (j < 2) {
// a error: there should be a character before *.
//return false;
}
// we can match 0 characters or match more characters.
for (int k = 0; k <= i; k++) {
// BUG 3: severe! Forget to deal with the empty string!!
if (k != 0 && !isSame(s[i - k], p[j - 2])) {
D[i][j] = false;
break;
}
if (D[i - k][j - 2]) {
D[i][j] = true;
break;
}
}
} else {
D[i][j] = i >= 1
&& isSame(s[i - 1], p[j - 1])
&& D[i - 1][j - 1];
}
}
}
return D[sLen][pLen];
}
public static bool isSame(char c, char p)
{
return p == '.' || c == p;
}
/*
* source code from blog:
* http://bangbingsyb.blogspot.ca/2014/11/leetcode-regular-expression-matching.html
*
* 关键在于如何处理这个'*'号。
状态:和Mininum Edit Distance这类题目一样。
dp[i][j]表示s[0:i-1]是否能和p[0:j-1]匹配。
递推公式:由于只有p中会含有regular expression,所以以p[j-1]来进行分类。
p[j-1] != '.' && p[j-1] != '*':dp[i][j] = dp[i-1][j-1] && (s[i-1] == p[j-1])
p[j-1] == '.':dp[i][j] = dp[i-1][j-1]
而关键的难点在于 p[j-1] = '*'。由于星号可以匹配0,1,乃至多个p[j-2]。
1. 匹配0个元素,即消去p[j-2],此时p[0: j-1] = p[0: j-3]
dp[i][j] = dp[i][j-2]
2. 匹配1个元素,此时p[0: j-1] = p[0: j-2]
dp[i][j] = dp[i][j-1]
3. 匹配多个元素,此时p[0: j-1] = { p[0: j-2], p[j-2], ... , p[j-2] }
dp[i][j] = dp[i-1][j] && (p[j-2]=='.' || s[i-2]==p[j-2])
*
* julia's comment:
* 1. Let me try to memorize this DP analysis and code.
* 2. Regular express is helpful, at least, I learn '.' rule this time.
*/
bool isMatch(string s, string p) {
int m = s.Length, n = p.Length;
bool[][] dp = new bool[m + 1][]; // pLen + 1
for (int i = 0; i < m + 1; i++)
dp[i] = new bool[n+ 1];
dp[0][0] = true;
for(int i=0; i<=m; i++) {
for(int j=1; j<=n; j++) {
if(p[j-1]!='.' && p[j-1]!='*') {
if(i>0 && s[i-1]==p[j-1] && dp[i-1][j-1])
dp[i][j] = true;
}
else if(p[j-1]=='.') {
if(i>0 && dp[i-1][j-1])
dp[i][j] = true;
}
else if(j>1) { //'*' cannot be the 1st element
if(dp[i][j-1] || dp[i][j-2]) // match 0 or 1 preceding element
dp[i][j] = true;
else if(i>0 && (p[j-2]==s[i-1] || p[j-2]=='.') && dp[i-1][j]) // match multiple preceding elements
dp[i][j] = true;
}
}
}
return dp[m][n];
}
}
}