Skip to content

Commit a2dabd9

Browse files
committed
memory management is even faster now
1 parent f634986 commit a2dabd9

File tree

2 files changed

+85
-114
lines changed

2 files changed

+85
-114
lines changed

src/qrisp/alg_primitives/arithmetic/ripple_mult.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def q_int_mult(factor_1, factor_2, inpl_adder = fourier_adder, target_qf = None)
4848
cx(factor_1[0], s)
4949
for i in range(factor_1.size):
5050

51-
inpl_adder(factor_2, s[i:])
51+
inpl_adder(factor_2[:len(s)-i], s[i:])
5252

5353
if i != factor_1.size-1:
5454
pass

src/qrisp/permeability/qc_transformations/memory_management.py

Lines changed: 84 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -129,100 +129,103 @@ def topological_sort(G, prefer=None, delay=None, sub_sort=nx.topological_sort):
129129
delay = lambda x: False
130130

131131
G = G.copy()
132-
# Collect the prefered nodes
133-
prefered_nodes = []
134-
prefered_node_indices = {}
132+
135133

136134
delay_nodes = []
137-
delay_node_indices = {}
135+
prefered_nodes = []
138136

139-
for n in G.nodes():
140-
n.processed = False
137+
node_list = list(G.nodes())
138+
139+
for i in range(len(node_list)):
140+
n = node_list[i]
141141
if n.instr is None:
142142
continue
143-
144-
if prefer(n.instr):
145-
prefered_node_indices[n] = len(prefered_nodes)
146-
prefered_nodes.append(n)
147-
148-
if delay(n.instr):
149-
delay_node_indices[n] = len(delay_nodes)
150-
delay_nodes.append(n)
151-
152-
143+
elif prefer(n.instr):
144+
prefered_nodes.append(i)
145+
elif delay(n.instr):
146+
delay_nodes.append(i)
147+
148+
sprs_mat = nx.to_scipy_sparse_array(G, format="csr")
149+
150+
res = toposort_helper(
151+
sprs_mat.indptr,
152+
sprs_mat.indices.astype(np.int32),
153+
len(G),
154+
np.array(delay_nodes, dtype = np.int32),
155+
np.array(prefered_nodes, dtype = np.int32))
156+
157+
return [node_list[i] for i in res]
153158

154159

155-
# For large scales, finding the ancestors is a bottleneck. We therefore use a
156-
# jitted version
157-
if len(G) * len(prefered_nodes) > 1000:
158-
anc_lists = ancestors(G, prefered_nodes)
159-
else:
160-
anc_lists = []
161-
for i in range(len(prefered_nodes)):
162-
anc_lists.append(list(nx.ancestors(G, prefered_nodes[i])))
163160

164-
node_ancs = {
165-
prefered_nodes[i]: anc_lists[i] for i in range(len(prefered_nodes))
166-
}
161+
@njit(cache = True)
162+
def toposort_helper(indptr, indices, node_amount, delay_nodes, prefered_nodes):
163+
# This array returns a graph that reflects all ancestor relations
164+
# i.e. ancestor_graph[42] is True at all ancestors of node 42
165+
ancestor_graph = compute_all_ancestors(indptr, indices, node_amount)
167166

168-
# We sort the nodes in order to prevent non-deterministic compilation behavior
169-
# prefered_nodes.sort(key=lambda x: len(node_ancs[x]) + 1/hash(x.instr))
167+
n = prefered_nodes.size
168+
m = delay_nodes.size
170169

171-
# Determine the required delay nodes for each prefered nodes
170+
# This array will contain the ancestor relations between the
171+
# prefered/delay nodes
172+
dependency_matrix = np.zeros((n, m), dtype = np.int8)
173+
174+
# Fill with information from ancestor_graph
175+
for i in range(n):
176+
for j in range(m):
177+
if ancestor_graph[prefered_nodes[i], delay_nodes[j]]:
178+
dependency_matrix[i, j] = 1
172179

173-
# For this we set up a matrix with boolean entriesthat indicates which
174-
# delay nodes are required to execute a prefered node.
175-
dependency_matrix = np.zeros((len(prefered_nodes), len(delay_nodes)), dtype = np.int8)
180+
# This array will contain the result
181+
res = np.zeros(node_amount, dtype = np.int32)
176182

177-
# Fill the matrix
178-
for n in prefered_nodes:
179-
n_index = prefered_node_indices[n]
180-
for k in node_ancs[n]:
181-
if k.instr:
182-
if delay(k.instr):
183-
dependency_matrix[n_index, delay_node_indices[k]] = 1
183+
# This array array tracks which nodes have not yet been processed.
184+
# It is initialized to all True because no nodes have been processed yet.
185+
remaining_nodes = np.ones(node_amount, dtype = np.int8)
184186

185-
# Generate linearization
186-
lin = []
187-
188-
while prefered_nodes:
189-
190-
# Find the node with least requirements
191-
required_delay_nodes = np.sum(dependency_matrix, axis = 1)
192-
prefered_node_index_array = np.array(list(map(lambda n : prefered_node_indices[n], prefered_nodes)), dtype = np.int32)
193-
min_node_index = np.argmin(required_delay_nodes[prefered_node_index_array])
194-
195-
node = prefered_nodes.pop(min_node_index)
196-
ancs = []
197-
198-
# Find the ancestors subgraph of nodes that have not been processed yet
199-
for n in node_ancs[node] + [node]:
200-
if n.processed:
201-
continue
202-
else:
203-
n.processed = True
204-
ancs.append(n)
205-
sub_graph = G.subgraph(ancs)
206-
207-
# Generate the linearization
208-
lin += list(sub_sort(sub_graph))
209-
210-
# Update the depedency matrix
211-
dependency_matrix = np.clip(dependency_matrix - dependency_matrix[prefered_node_indices[n], :], 0, 1)
212-
213-
# Linearize the remainder
214-
remainder = []
215-
for n in G.nodes():
216-
if n.processed:
217-
continue
218-
else:
219-
n.processed = True
220-
remainder.append(n)
221-
222-
# lin += list(sub_sort(G))
223-
lin += list(sub_sort(G.subgraph(remainder)))
187+
# This integer will contain the amount of nodes that have been processed
188+
node_counter = 0
189+
190+
if m != 0:
191+
for i in range(n):
192+
# For each prefer nodes we compute how many delay nodes are required.
193+
required_delay_nodes = np.sum(dependency_matrix, axis = 1)
194+
195+
# We determine the prefer node that requires the least delay nodes
196+
min_node_index = np.argmin(required_delay_nodes)
197+
prefer_node = prefered_nodes[min_node_index]
198+
199+
# We determine the ancestor nodes of this node that have
200+
# not been processed yet
201+
to_be_processed = ancestor_graph[prefer_node,:] & remaining_nodes
202+
ancestor_indices = np.nonzero(to_be_processed)[0]
203+
204+
# We insert the nodes in the result array.
205+
# We can assume that order of the nodes induces by their numbering
206+
# is already a topological ordering. Therefore inserting them in
207+
# order is also a topological sub sort.
208+
res[node_counter:node_counter+len(ancestor_indices)] = ancestor_indices
209+
node_counter += len(ancestor_indices)
210+
211+
# Mark the nodes as processed
212+
remaining_nodes[ancestor_indices] = 0
213+
214+
215+
# Update the depedency matrix: All delay nodes that have been processed
216+
# don't need to be considered again for all following iterations,
217+
# we therefore remove them from the other columns
218+
dependency_matrix = np.clip(dependency_matrix - dependency_matrix[min_node_index, :], 0, 1)
219+
220+
# Finaly we set all nodes in the processed column to 1 so this column
221+
# is not processed again.
222+
dependency_matrix[min_node_index, :] = 1
224223

225-
return lin
224+
# Insert the remaining nodes
225+
res[node_counter:] = np.nonzero(remaining_nodes)[0]
226+
227+
# return the result
228+
return res
226229

227230

228231
@njit(cache=True)
@@ -250,36 +253,4 @@ def compute_all_ancestors(indptr, indices, node_amount):
250253
if in_degree[child] == 0:
251254
queue.append(child)
252255

253-
return ancestors
254-
255-
@njit(cache=True)
256-
def ancestors_jitted_wrapper(start_indices, indptr, indices, node_amount):
257-
all_ancestors = compute_all_ancestors(indptr, indices, node_amount)
258-
259-
res = [np.zeros(1, dtype=np.int64)] * len(start_indices)
260-
for i, start_index in enumerate(start_indices):
261-
res[i] = np.where(all_ancestors[start_index])[0]
262-
263-
return res
264-
265-
266-
def ancestors(dag, start_nodes):
267-
node_list = list(dag.nodes())
268-
269-
sprs_mat = nx.to_scipy_sparse_array(dag, format="csr")
270-
271-
node_inversion_dic = {node_list[i] : i for i in range(len(node_list))}
272-
start_indices = [node_inversion_dic[node] for node in start_nodes]
273-
274-
res_list_indices = ancestors_jitted_wrapper(
275-
np.array(start_indices).astype(np.int32),
276-
sprs_mat.indptr,
277-
sprs_mat.indices.astype(np.int32),
278-
len(dag),
279-
)
280-
281-
res_node_list = [
282-
[node_list[j] for j in anc_indices] for anc_indices in res_list_indices
283-
]
284-
285-
return res_node_list
256+
return ancestors

0 commit comments

Comments
 (0)