Skip to content

Commit 6e4b2bc

Browse files
committed
Updates and adds comments.
1 parent 6eadc55 commit 6e4b2bc

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

nx_cugraph/algorithms/link_prediction.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ def jaccard_coefficient(G, ebunch=None):
3535

3636
G = _to_undirected_graph(G)
3737

38+
# FIXME: this zip() call appears to be the performance bottleneck for this
39+
# function. Is there a better way?
3840
(u, v) = zip(*ebunch)
41+
3942
try:
4043
# Convert the ebunch lists to cupy arrays for passing to PLC, possibly
4144
# mapping to integers if the Graph was renumbered.
@@ -47,12 +50,12 @@ def jaccard_coefficient(G, ebunch=None):
4750
raise nx.NodeNotFound(f"Node {n} not in G.")
4851
else:
4952
# If G was not renumbered, then the ebunch nodes must be explicitly
50-
# checked (note: ebunch can be very large). plc.jaccard_coefficients()
51-
# will accept node IDs that are not in the graph and return a
52-
# coefficient of 0 for them.
53+
# checked (note: ebunch can be very large). If not done,
54+
# plc.jaccard_coefficients() will accept node IDs not in the graph and
55+
# return a coefficient of 0 for them, which is not compatible with NX.
5356
#
54-
# FIXME: Is there a better way to do this? Should this be a utility
55-
# (or is it already)?
57+
# FIXME: Is there a better way to do this? Is there a utility to check
58+
# if a node ID is valid for the graph?
5659
if not hasattr(G, "key_to_id") or G.key_to_id is None:
5760
ebunch_nodes = cp.unique(cp.concatenate([u, v]))
5861
graph_nodes = cp.unique(
@@ -70,8 +73,9 @@ def jaccard_coefficient(G, ebunch=None):
7073

7174
# Note that Jaccard similarity must run on a symmetric graph.
7275
# FIXME: PLC will symmetrize the graph if told to, but the symmetrize flag
73-
# to _get_plc_graph() does other things (cast to 64bit, etc.). Can we let
74-
# PLC do the symmetrization if the symmetrize flag is set instead?
76+
# to _get_plc_graph() appears to symmetrize using cupy and does other
77+
# things (cast to 64bit, etc.). Can we let PLC do the symmetrization if the
78+
# symmetrize flag is set instead?
7579
(u, v, p) = plc.jaccard_coefficients(
7680
resource_handle=plc.ResourceHandle(),
7781
graph=G._get_plc_graph(symmetrize=None),

0 commit comments

Comments
 (0)