55
66namespace CodeJam . Collections
77{
8- public abstract class SuffixTreeBase
9- {
8+ public abstract class SuffixTreeBase
9+ {
1010 /// <summary>Node alignment in Print output</summary>
1111 private const int Align = 6 ;
1212 /// <summary>Root node index</summary>
13- protected const int RootNodeIndex = 0 ;
13+ protected const int RootNodeIndex = 0 ;
1414
1515 /// <summary>Tree nodes</summary>
1616 private readonly List < Node > _nodes ;
@@ -21,27 +21,27 @@ public abstract class SuffixTreeBase
2121 /// <summary>Adds a new node</summary>
2222 /// <param name="node">A node to add</param>
2323 /// <returns>Index of the node</returns>
24- protected int AddNode ( Node node )
25- {
26- var index = _nodes . Count ;
24+ protected int AddNode ( Node node )
25+ {
26+ var index = _nodes . Count ;
2727 _nodes . Add ( node ) ;
28- return index ;
29- }
28+ return index ;
29+ }
3030
3131 /// <summary>Updates the node at the index</summary>
3232 /// <param name="index">The index to update</param>
3333 /// <param name="node">The new node value</param>
34- protected void UpdateNode ( int index , Node node ) => _nodes [ index ] = node ;
34+ protected void UpdateNode ( int index , Node node ) => _nodes [ index ] = node ;
3535
36- /// <summary>Gets a node at the index</summary>
36+ /// <summary>Gets a node at the index</summary>
3737 /// <param name="index">The index of the node</param>
3838 /// <returns>The node</returns>
39- protected Node GetNode ( int index ) => _nodes [ index ] ;
39+ protected Node GetNode ( int index ) => _nodes [ index ] ;
4040
4141 /// <summary>Number of nodes</summary>
42- protected int NodesCount => _nodes . Count ;
42+ protected int NodesCount => _nodes . Count ;
4343
44- /// <summary>Concatenated input strings</summary>
44+ /// <summary>Concatenated input strings</summary>
4545 protected string InternalData { get ; private set ; }
4646
4747 /// <summary>List of end positions of added strings inside the InternalData</summary>
@@ -75,73 +75,141 @@ public void Add([NotNull]string data)
7575 BuildFor ( begin , InternalData . Length ) ;
7676 }
7777
78+ /// <summary>Enumerates all suffixes in the suffix tree</summary>
79+ /// <remarks>May return suffixes with the same value of the they are present in different source strings</remarks>
80+ /// <returns>The enumeration of all suffixes</returns>
81+ [ PublicAPI ]
82+ public IEnumerable < Suffix > AllSuffixes ( )
83+ {
84+ if ( Root . IsLeaf ) // Empty tree
85+ {
86+ yield break ;
87+ }
88+
89+ var branchStack = new Stack < BranchPoint > ( ) ;
90+ var branchPoint = new BranchPoint { Node = Root , EdgeIndex = 0 } ;
91+ var length = 0 ;
92+ for ( ; ; )
93+ {
94+ var edge = GetNode ( branchPoint . Node . Children [ branchPoint . EdgeIndex ] ) ;
95+ var edgeLength = edge . Length ;
96+ length += edgeLength ;
97+ if ( ! edge . IsTerminal )
98+ {
99+ branchPoint . Length = edgeLength ;
100+ branchStack . Push ( branchPoint ) ;
101+ branchPoint = new BranchPoint { Node = edge , EdgeIndex = 0 } ;
102+ continue ;
103+ }
104+
105+ // We have descended to a terminal edge. Let's produce a suffix
106+ var end = edge . End ;
107+ var offset = end - length ;
108+ var sourceIndex = GetSourceIndexByEnd ( end ) ;
109+ yield return new Suffix ( InternalData , sourceIndex , offset , length ) ;
110+
111+ // Move to the next suffix branch
112+ for ( ; ; )
113+ {
114+ length -= edgeLength ;
115+ var nextEdgeIndex = branchPoint . EdgeIndex + 1 ;
116+ if ( nextEdgeIndex < branchPoint . Node . Children . Count )
117+ {
118+ branchPoint . EdgeIndex = nextEdgeIndex ;
119+ break ;
120+ }
121+ // There is no more branches on the current level
122+ // Return to the previous level
123+ if ( branchStack . Count == 0 )
124+ {
125+ // no more branches to visit
126+ DebugCode . AssertState ( length == 0 , "Illegal final length. Check logic" ) ;
127+ yield break ;
128+ }
129+ branchPoint = branchStack . Pop ( ) ;
130+ edgeLength = branchPoint . Length ;
131+ }
132+ }
133+ }
134+
135+ /// <summary>Locates the source string index by the suffix end</summary>
136+ /// <param name="end">The suffix end</param>
137+ /// <returns>The source string index</returns>
138+ private int GetSourceIndexByEnd ( int end )
139+ {
140+ var index = EndPositions . LowerBound ( end ) ;
141+ DebugCode . AssertState ( index < EndPositions . Count && EndPositions [ index ] == end
142+ , "Invalid source index computed. Check logic" ) ;
143+ return index ;
144+ }
145+
78146 /// <summary>Appends suffixes for the last added string</summary>
79- protected abstract void BuildFor ( int begin , int end ) ;
147+ protected abstract void BuildFor ( int begin , int end ) ;
80148
81149 /// <summary>Creates a comparer for nodes against a char</summary>
82150 /// <returns>The comparer</returns>
83- protected Func < int , char , int > GetComparer ( ) => ( index , c ) =>
84- {
85- var node = GetNode ( index ) ;
86- if ( node . Begin == node . End ) // no char always less than any char
87- {
88- return - 1 ;
89- }
151+ protected Func < int , char , int > GetComparer ( ) => ( index , c ) =>
152+ {
153+ var node = GetNode ( index ) ;
154+ if ( node . Begin == node . End ) // no char always less than any char
155+ {
156+ return - 1 ;
157+ }
90158 var firstChar = InternalData [ node . Begin ] ;
91- return firstChar - c ;
92- } ;
159+ return firstChar - c ;
160+ } ;
93161
94162 /// <summary>Prints the tree structure to the string for the debugging purposes</summary>
95163 /// <returns>The tree structure as a string</returns>
96164 [ Pure ]
97165 public string Print ( )
98- {
99- var sb = new StringBuilder ( ) ;
100- var currentIndex = RootNodeIndex ;
101- var stack = new List < ValueTuple < int , int > > ( ) ;
102- for ( ; ; )
103- {
104- PrintNodeWithPath ( sb , currentIndex , stack ) ;
105- var node = GetNode ( currentIndex ) ;
106- if ( node . Children != null )
107- {
108- stack . Add ( ValueTuple . Create ( currentIndex , node . Children . Count - 2 ) ) ;
109- currentIndex = node . Children [ node . Children . Count - 1 ] ;
166+ {
167+ var sb = new StringBuilder ( ) ;
168+ var currentIndex = RootNodeIndex ;
169+ var stack = new List < ValueTuple < int , int > > ( ) ;
170+ for ( ; ; )
171+ {
172+ PrintNodeWithPath ( sb , currentIndex , stack ) ;
173+ var node = GetNode ( currentIndex ) ;
174+ if ( node . Children != null )
175+ {
176+ stack . Add ( ValueTuple . Create ( currentIndex , node . Children . Count - 2 ) ) ;
177+ currentIndex = node . Children [ node . Children . Count - 1 ] ;
110178 continue ;
111- }
112- currentIndex = - 1 ;
113- while ( stack . Count > 0 )
114- {
179+ }
180+ currentIndex = - 1 ;
181+ while ( stack . Count > 0 )
182+ {
115183 var t = stack [ stack . Count - 1 ] ;
116184 stack . RemoveAt ( stack . Count - 1 ) ;
117185 node = GetNode ( t . Item1 ) ;
118186 var nextChild = t . Item2 ;
119- if ( nextChild >= 0 )
120- {
121- currentIndex = node . Children [ nextChild ] ;
187+ if ( nextChild >= 0 )
188+ {
189+ currentIndex = node . Children [ nextChild ] ;
122190 stack . Add ( ValueTuple . Create ( t . Item1 , nextChild - 1 ) ) ;
123- break ;
124- }
191+ break ;
192+ }
125193 }
126- if ( currentIndex == - 1 )
127- {
128- break ;
129- }
130- }
194+ if ( currentIndex == - 1 )
195+ {
196+ break ;
197+ }
198+ }
131199 return sb . ToString ( ) ;
132- }
200+ }
133201
134202 /// <summary>Prints a single node representation along with the path prefix</summary>
135203 /// <param name="sb">The builder to print to</param>
136204 /// <param name="nodeIndex">THe index of the node</param>
137205 /// <param name="stack">The stack of nodes to process</param>
138- private void PrintNodeWithPath ( [ NotNull ] StringBuilder sb , int nodeIndex
206+ private void PrintNodeWithPath ( [ NotNull ] StringBuilder sb , int nodeIndex
139207 , [ NotNull ] IReadOnlyList < ValueTuple < int , int > > stack )
140- {
141- if ( stack . Count > 0 )
142- {
143- for ( var i = 0 ; i < stack . Count - 1 ; ++ i )
144- {
208+ {
209+ if ( stack . Count > 0 )
210+ {
211+ for ( var i = 0 ; i < stack . Count - 1 ; ++ i )
212+ {
145213 sb . Append ( stack [ i ] . Item2 >= 0 ? '|' : ' ' ) ;
146214 sb . Append ( ' ' , Align - 1 ) ;
147215 }
@@ -155,13 +223,13 @@ private void PrintNodeWithPath([NotNull] StringBuilder sb, int nodeIndex
155223 sb . Append ( '_' , Align - 1 ) ;
156224 }
157225 PrintNodeText ( sb , nodeIndex ) ;
158- }
226+ }
159227
160228 /// <summary>Prints a single node information</summary>
161229 /// <param name="sb">The builder to print to</param>
162230 /// <param name="nodeIndex">The node index</param>
163- protected virtual void PrintNodeText ( [ NotNull ] StringBuilder sb , int nodeIndex )
164- {
231+ protected virtual void PrintNodeText ( [ NotNull ] StringBuilder sb , int nodeIndex )
232+ {
165233 var n = GetNode ( nodeIndex ) ;
166234 sb . AppendLine ( $ "({ nodeIndex } , [{ n . Begin } -{ n . End } ), { InternalData . Substring ( n . Begin , n . Length ) } )") ;
167235 }
@@ -175,7 +243,7 @@ protected struct Node
175243 /// <param name="begin">An edge start offset</param>
176244 /// <param name="end">An edge end offset</param>
177245 /// <param name="terminal">Is the edge terminates the string or not</param>
178- public Node ( int begin , int end , bool terminal ) : this ( begin , end , terminal , null ) { }
246+ public Node ( int begin , int end , bool terminal ) : this ( begin , end , terminal , null ) { }
179247
180248 /// <summary>Constructs a new node</summary>
181249 /// <param name="begin">An edge start offset</param>
@@ -206,5 +274,16 @@ public Node(int begin, int end, bool terminal, List<int> children)
206274 /// <summary>Length of the corresponding substring</summary>
207275 public int Length => End - Begin ;
208276 }
277+
278+ /// <summary>Branching point</summary>
279+ private class BranchPoint
280+ {
281+ /// <summary>The tree node</summary>
282+ public Node Node ;
283+ /// <summary>The chosen edge</summary>
284+ public int EdgeIndex ;
285+ /// <summary>The length over the edge</summary>
286+ public int Length ;
287+ }
209288 }
210289}
0 commit comments