@@ -129,100 +129,103 @@ def topological_sort(G, prefer=None, delay=None, sub_sort=nx.topological_sort):
129
129
delay = lambda x : False
130
130
131
131
G = G .copy ()
132
- # Collect the prefered nodes
133
- prefered_nodes = []
134
- prefered_node_indices = {}
132
+
135
133
136
134
delay_nodes = []
137
- delay_node_indices = {}
135
+ prefered_nodes = []
138
136
139
- for n in G .nodes ():
140
- n .processed = False
137
+ node_list = list (G .nodes ())
138
+
139
+ for i in range (len (node_list )):
140
+ n = node_list [i ]
141
141
if n .instr is None :
142
142
continue
143
-
144
- if prefer (n .instr ):
145
- prefered_node_indices [n ] = len (prefered_nodes )
146
- prefered_nodes .append (n )
147
-
148
- if delay (n .instr ):
149
- delay_node_indices [n ] = len (delay_nodes )
150
- delay_nodes .append (n )
151
-
152
-
143
+ elif prefer (n .instr ):
144
+ prefered_nodes .append (i )
145
+ elif delay (n .instr ):
146
+ delay_nodes .append (i )
147
+
148
+ sprs_mat = nx .to_scipy_sparse_array (G , format = "csr" )
149
+
150
+ res = toposort_helper (
151
+ sprs_mat .indptr ,
152
+ sprs_mat .indices .astype (np .int32 ),
153
+ len (G ),
154
+ np .array (delay_nodes , dtype = np .int32 ),
155
+ np .array (prefered_nodes , dtype = np .int32 ))
156
+
157
+ return [node_list [i ] for i in res ]
153
158
154
159
155
- # For large scales, finding the ancestors is a bottleneck. We therefore use a
156
- # jitted version
157
- if len (G ) * len (prefered_nodes ) > 1000 :
158
- anc_lists = ancestors (G , prefered_nodes )
159
- else :
160
- anc_lists = []
161
- for i in range (len (prefered_nodes )):
162
- anc_lists .append (list (nx .ancestors (G , prefered_nodes [i ])))
163
160
164
- node_ancs = {
165
- prefered_nodes [i ]: anc_lists [i ] for i in range (len (prefered_nodes ))
166
- }
161
+ @njit (cache = True )
162
+ def toposort_helper (indptr , indices , node_amount , delay_nodes , prefered_nodes ):
163
+ # This array returns a graph that reflects all ancestor relations
164
+ # i.e. ancestor_graph[42] is True at all ancestors of node 42
165
+ ancestor_graph = compute_all_ancestors (indptr , indices , node_amount )
167
166
168
- # We sort the nodes in order to prevent non-deterministic compilation behavior
169
- # prefered_nodes.sort(key=lambda x: len(node_ancs[x]) + 1/hash(x.instr))
167
+ n = prefered_nodes . size
168
+ m = delay_nodes . size
170
169
171
- # Determine the required delay nodes for each prefered nodes
170
+ # This array will contain the ancestor relations between the
171
+ # prefered/delay nodes
172
+ dependency_matrix = np .zeros ((n , m ), dtype = np .int8 )
173
+
174
+ # Fill with information from ancestor_graph
175
+ for i in range (n ):
176
+ for j in range (m ):
177
+ if ancestor_graph [prefered_nodes [i ], delay_nodes [j ]]:
178
+ dependency_matrix [i , j ] = 1
172
179
173
- # For this we set up a matrix with boolean entriesthat indicates which
174
- # delay nodes are required to execute a prefered node.
175
- dependency_matrix = np .zeros ((len (prefered_nodes ), len (delay_nodes )), dtype = np .int8 )
180
+ # This array will contain the result
181
+ res = np .zeros (node_amount , dtype = np .int32 )
176
182
177
- # Fill the matrix
178
- for n in prefered_nodes :
179
- n_index = prefered_node_indices [n ]
180
- for k in node_ancs [n ]:
181
- if k .instr :
182
- if delay (k .instr ):
183
- dependency_matrix [n_index , delay_node_indices [k ]] = 1
183
+ # This array array tracks which nodes have not yet been processed.
184
+ # It is initialized to all True because no nodes have been processed yet.
185
+ remaining_nodes = np .ones (node_amount , dtype = np .int8 )
184
186
185
- # Generate linearization
186
- lin = []
187
-
188
- while prefered_nodes :
189
-
190
- # Find the node with least requirements
191
- required_delay_nodes = np .sum (dependency_matrix , axis = 1 )
192
- prefered_node_index_array = np .array (list (map (lambda n : prefered_node_indices [n ], prefered_nodes )), dtype = np .int32 )
193
- min_node_index = np .argmin (required_delay_nodes [prefered_node_index_array ])
194
-
195
- node = prefered_nodes .pop (min_node_index )
196
- ancs = []
197
-
198
- # Find the ancestors subgraph of nodes that have not been processed yet
199
- for n in node_ancs [node ] + [node ]:
200
- if n .processed :
201
- continue
202
- else :
203
- n .processed = True
204
- ancs .append (n )
205
- sub_graph = G .subgraph (ancs )
206
-
207
- # Generate the linearization
208
- lin += list (sub_sort (sub_graph ))
209
-
210
- # Update the depedency matrix
211
- dependency_matrix = np .clip (dependency_matrix - dependency_matrix [prefered_node_indices [n ], :], 0 , 1 )
212
-
213
- # Linearize the remainder
214
- remainder = []
215
- for n in G .nodes ():
216
- if n .processed :
217
- continue
218
- else :
219
- n .processed = True
220
- remainder .append (n )
221
-
222
- # lin += list(sub_sort(G))
223
- lin += list (sub_sort (G .subgraph (remainder )))
187
+ # This integer will contain the amount of nodes that have been processed
188
+ node_counter = 0
189
+
190
+ if m != 0 :
191
+ for i in range (n ):
192
+ # For each prefer nodes we compute how many delay nodes are required.
193
+ required_delay_nodes = np .sum (dependency_matrix , axis = 1 )
194
+
195
+ # We determine the prefer node that requires the least delay nodes
196
+ min_node_index = np .argmin (required_delay_nodes )
197
+ prefer_node = prefered_nodes [min_node_index ]
198
+
199
+ # We determine the ancestor nodes of this node that have
200
+ # not been processed yet
201
+ to_be_processed = ancestor_graph [prefer_node ,:] & remaining_nodes
202
+ ancestor_indices = np .nonzero (to_be_processed )[0 ]
203
+
204
+ # We insert the nodes in the result array.
205
+ # We can assume that order of the nodes induces by their numbering
206
+ # is already a topological ordering. Therefore inserting them in
207
+ # order is also a topological sub sort.
208
+ res [node_counter :node_counter + len (ancestor_indices )] = ancestor_indices
209
+ node_counter += len (ancestor_indices )
210
+
211
+ # Mark the nodes as processed
212
+ remaining_nodes [ancestor_indices ] = 0
213
+
214
+
215
+ # Update the depedency matrix: All delay nodes that have been processed
216
+ # don't need to be considered again for all following iterations,
217
+ # we therefore remove them from the other columns
218
+ dependency_matrix = np .clip (dependency_matrix - dependency_matrix [min_node_index , :], 0 , 1 )
219
+
220
+ # Finaly we set all nodes in the processed column to 1 so this column
221
+ # is not processed again.
222
+ dependency_matrix [min_node_index , :] = 1
224
223
225
- return lin
224
+ # Insert the remaining nodes
225
+ res [node_counter :] = np .nonzero (remaining_nodes )[0 ]
226
+
227
+ # return the result
228
+ return res
226
229
227
230
228
231
@njit (cache = True )
@@ -250,36 +253,4 @@ def compute_all_ancestors(indptr, indices, node_amount):
250
253
if in_degree [child ] == 0 :
251
254
queue .append (child )
252
255
253
- return ancestors
254
-
255
- @njit (cache = True )
256
- def ancestors_jitted_wrapper (start_indices , indptr , indices , node_amount ):
257
- all_ancestors = compute_all_ancestors (indptr , indices , node_amount )
258
-
259
- res = [np .zeros (1 , dtype = np .int64 )] * len (start_indices )
260
- for i , start_index in enumerate (start_indices ):
261
- res [i ] = np .where (all_ancestors [start_index ])[0 ]
262
-
263
- return res
264
-
265
-
266
- def ancestors (dag , start_nodes ):
267
- node_list = list (dag .nodes ())
268
-
269
- sprs_mat = nx .to_scipy_sparse_array (dag , format = "csr" )
270
-
271
- node_inversion_dic = {node_list [i ] : i for i in range (len (node_list ))}
272
- start_indices = [node_inversion_dic [node ] for node in start_nodes ]
273
-
274
- res_list_indices = ancestors_jitted_wrapper (
275
- np .array (start_indices ).astype (np .int32 ),
276
- sprs_mat .indptr ,
277
- sprs_mat .indices .astype (np .int32 ),
278
- len (dag ),
279
- )
280
-
281
- res_node_list = [
282
- [node_list [j ] for j in anc_indices ] for anc_indices in res_list_indices
283
- ]
284
-
285
- return res_node_list
256
+ return ancestors
0 commit comments