diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java index 8e390bbf..779dd839 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java @@ -240,11 +240,15 @@ public void cleanup() { } private void reconnectOrphanedNodes() { - // It's possible that reconnecting one node will result in disconnecting another, since we are maintaining - // the maxConnections invariant. So, we do a best effort of 5 loops. We claim the entry node as an - // already used connectionTarget so that we don't clutter its edge list. + // Set of nodes that have been used as connection targets. Since reconnection edges are + // usually worse (by distance and/or diversity) than the original ones, we use this + // to avoid reusing the same target node more than once. var connectionTargets = ConcurrentHashMap.newKeySet(); + // It's particularly important for the entry node to have high quality edges, so mark it + // as an already-used connectionTarget before we start. connectionTargets.add(graph.entry()); + // It's possible that reconnecting one node will result in disconnecting another, since we are maintaining + // the maxConnections invariant. So, we do a best effort of 5 loops. for (int i = 0; i < 5; i++) { // find all nodes reachable from the entry node var connectedNodes = new AtomicFixedBitSet(graph.getIdUpperBound());