Skip to content

Commit 4519e15

Browse files
Lexeyandrewvk
authored andcommitted
Implemented Contains and ContainsSuffix
1 parent a0667c1 commit 4519e15

File tree

2 files changed

+186
-15
lines changed

2 files changed

+186
-15
lines changed

Experimental/src/Collections/SuffixTreeBase.cs

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ public void Add([NotNull]string data)
7979
/// <remarks>May return suffixes with the same value of the they are present in different source strings</remarks>
8080
/// <returns>The enumeration of all suffixes</returns>
8181
[PublicAPI]
82+
[Pure]
8283
public IEnumerable<Suffix> AllSuffixes()
8384
{
8485
if (Root.IsLeaf) // Empty tree
@@ -132,6 +133,111 @@ public IEnumerable<Suffix> AllSuffixes()
132133
}
133134
}
134135

136+
/// <summary>Checks wether the suffix tree contains the given substring or not</summary>
137+
/// <param name="substring">The substring to locate</param>
138+
/// <returns>true if found, false otherwise</returns>
139+
[PublicAPI]
140+
[Pure]
141+
public bool Contains([NotNull] string substring)
142+
{
143+
Code.NotNull(substring, nameof(substring));
144+
if (substring == string.Empty)
145+
{
146+
return true;
147+
}
148+
var r = FindBranch(substring);
149+
return r != null;
150+
}
151+
152+
/// <summary>Checks wether the suffix tree contains the given suffix or not</summary>
153+
/// <param name="suffix">The suffix to locate</param>
154+
/// <returns>true if found, false otherwise</returns>
155+
[PublicAPI]
156+
[Pure]
157+
public bool ContainsSuffix([NotNull] string suffix)
158+
{
159+
Code.NotNull(suffix, nameof(suffix));
160+
if (suffix == string.Empty)
161+
{
162+
return true;
163+
}
164+
var r = FindBranch(suffix);
165+
if (r == null)
166+
{
167+
return false;
168+
}
169+
var edge = r.Item1;
170+
var length = r.Item2;
171+
if (length < edge.Length) // proper substring of a suffix?
172+
{
173+
return false;
174+
}
175+
if (edge.IsLeaf) // a terminal edge?
176+
{
177+
return true;
178+
}
179+
return GetNode(edge.Children[0]).Length == 0; // has a child terminal edge of zero length
180+
}
181+
182+
/// <summary>Locates the branch corresponding to the given string</summary>
183+
/// <param name="s">The string to find</param>
184+
/// <returns>The last matched edge and the matched length over this edge or null if no match found</returns>
185+
[Pure]
186+
private Tuple<Node, int> FindBranch([NotNull] string s)
187+
{
188+
DebugCode.AssertState(s.Length > 0, "The string length should be positive");
189+
var currentNode = Root;
190+
var comparer = GetComparer();
191+
var offset = 0;
192+
for (;;)
193+
{
194+
Node edge;
195+
var edgeIndex = FindEdge(currentNode, comparer, s[offset], out edge);
196+
if (edgeIndex == -1)
197+
{
198+
return null;
199+
}
200+
var edgeLength = edge.Length;
201+
var compareLength = Math.Min(s.Length - offset, edgeLength);
202+
if (compareLength > 1
203+
&& string.CompareOrdinal(s, offset + 1, InternalData, edge.Begin + 1, compareLength - 1) != 0)
204+
{
205+
return null;
206+
}
207+
offset += compareLength;
208+
if (offset == s.Length)
209+
{
210+
return Tuple.Create(edge, compareLength);
211+
}
212+
DebugCode.AssertState(compareLength == edgeLength, "Invalid compare length. Check logic");
213+
currentNode = edge;
214+
// continue search from the next level
215+
}
216+
}
217+
218+
/// <summary>Finds an edge from the given node corresponding to the given char</summary>
219+
/// <param name="node">The node to search in</param>
220+
/// <param name="comparer">The comparer used to find the char</param>
221+
/// <param name="c">The char to find</param>
222+
/// <param name="edge">Te edge found</param>
223+
/// <returns>The index of the edge or -1 if there is no edge starting with the given char</returns>
224+
[Pure]
225+
private int FindEdge(Node node, [NotNull] Func<int, char, int> comparer, char c, out Node edge)
226+
{
227+
edge = default(Node);
228+
if (node.IsLeaf)
229+
{
230+
return -1;
231+
}
232+
var edgeIndex = node.Children.LowerBound(c, comparer);
233+
if (edgeIndex == node.Children.Count)
234+
{
235+
return -1;
236+
}
237+
edge = GetNode(node.Children[edgeIndex]);
238+
return edge.Length > 0 && InternalData[edge.Begin] == c ? edgeIndex : -1;
239+
}
240+
135241
/// <summary>Locates the source string index by the suffix end</summary>
136242
/// <param name="end">The suffix end</param>
137243
/// <returns>The source string index</returns>

Experimental/tests/Collections/SuffixTreeTest.cs

Lines changed: 80 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -60,36 +60,101 @@ public void Test15RandomMultiple()
6060

6161
[Test]
6262
public void Test16AllSuffixes()
63+
{
64+
const int length = 50;
65+
// pure random
66+
for (var numberOfString = 1; numberOfString < 6; ++numberOfString)
67+
{
68+
var strings = Enumerable.Range(0, numberOfString)
69+
.Select(_ => MakeRandomString(length)).ToArray();
70+
TestAllSuffixes(strings);
71+
}
72+
// with guaranteed duplicates
73+
var s = MakeRandomString(length);
74+
for (var numberOfString = 2; numberOfString < 6; ++numberOfString)
75+
{
76+
var strings = Enumerable.Range(0, numberOfString - 2)
77+
.Select(_ => MakeRandomString(length)).Union(s, s).ToArray();
78+
TestAllSuffixes(strings);
79+
}
80+
}
81+
82+
[Test]
83+
public void Test17Contains()
6384
{
6485
const int length = 50;
6586
for (var numberOfString = 1; numberOfString < 6; ++numberOfString)
6687
{
6788
var strings = Enumerable.Range(0, numberOfString)
6889
.Select(_ => MakeRandomString(length)).ToArray();
69-
var expectedSuffixes = new List<string>();
70-
var expectedCounts = new LazyDictionary<string, List<int>>(_ => new List<int>());
7190
var st = new SuffixTree();
72-
for (var i = 0; i < strings.Length; ++i)
91+
var suffixes = new HashSet<string>();
92+
var properSubstrings = new HashSet<string>();
93+
foreach (var s in strings)
7394
{
74-
var s = strings[i];
7595
st.Add(s);
76-
for (var j = 0; j < s.Length; ++j)
96+
for (var i = 0; i < s.Length; ++i)
7797
{
78-
var suffix = s.Substring(j);
79-
expectedSuffixes.Add(suffix);
80-
expectedCounts[suffix].Add(i);
98+
var suffix = s.Substring(i);
99+
suffixes.Add(suffix);
100+
if (suffix.Length != 1)
101+
{
102+
properSubstrings.Add(suffix.Substring(0, suffix.Length - 1));
103+
}
81104
}
82105
}
106+
properSubstrings.ExceptWith(suffixes);
83107
st.Compact();
84-
expectedSuffixes.Sort();
85-
var suffixes = st.AllSuffixes().ToList();
86-
Assert.That(suffixes.Select(_ => _.Value).ToList(), Is.EqualTo(expectedSuffixes));
87-
var grouped = suffixes.Select(_ => new { value = _.Value, source = _.SourceIndex })
88-
.GroupBy(_ => _.value).ToDictionary(_ => _.Key, _ => _.Select(v => v.source).OrderBy(v => v).ToList());
89-
foreach (var v in grouped)
108+
109+
const string notPresent = "@";
110+
Assert.That(st.Contains(string.Empty));
111+
Assert.That(st.ContainsSuffix(string.Empty));
112+
Assert.That(!st.Contains(notPresent));
113+
Assert.That(!st.ContainsSuffix(notPresent));
114+
foreach (var suffix in suffixes)
90115
{
91-
Assert.That(v.Value, Is.EqualTo(expectedCounts[v.Key]));
116+
Assert.That(st.Contains(suffix));
117+
Assert.That(st.ContainsSuffix(suffix));
92118
}
119+
foreach (var properSubstring in properSubstrings)
120+
{
121+
Assert.That(st.Contains(properSubstring));
122+
Assert.That(!st.ContainsSuffix(properSubstring));
123+
for (var i = 0; i <= properSubstring.Length; ++i)
124+
{
125+
var notSubstring = properSubstring.Insert(i, notPresent);
126+
Assert.That(!st.Contains(notSubstring));
127+
Assert.That(!st.ContainsSuffix(notSubstring));
128+
}
129+
}
130+
}
131+
}
132+
133+
private static void TestAllSuffixes(string[] strings)
134+
{
135+
var expectedSuffixes = new List<string>();
136+
var expectedCounts = new LazyDictionary<string, List<int>>(_ => new List<int>());
137+
var st = new SuffixTree();
138+
for (var i = 0; i < strings.Length; ++i)
139+
{
140+
var s = strings[i];
141+
st.Add(s);
142+
for (var j = 0; j < s.Length; ++j)
143+
{
144+
var suffix = s.Substring(j);
145+
expectedSuffixes.Add(suffix);
146+
expectedCounts[suffix].Add(i);
147+
}
148+
}
149+
st.Compact();
150+
expectedSuffixes.Sort();
151+
var suffixes = st.AllSuffixes().ToList();
152+
Assert.That(suffixes.Select(_ => _.Value).ToList(), Is.EqualTo(expectedSuffixes));
153+
var grouped = suffixes.Select(_ => new { value = _.Value, source = _.SourceIndex })
154+
.GroupBy(_ => _.value).ToDictionary(_ => _.Key, _ => _.Select(v => v.source).OrderBy(v => v).ToList());
155+
foreach (var v in grouped)
156+
{
157+
Assert.That(v.Value, Is.EqualTo(expectedCounts[v.Key]));
93158
}
94159
}
95160

0 commit comments

Comments
 (0)