From 1566046a8a2982d28d0f68e86932132e6ddc5307 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 1 Oct 2025 15:33:03 +0200 Subject: [PATCH] Bug fix counting top-level domains in node list For the last block of domain names in reversed notation, sharing the same TLD prefix, two counts are returned: the correct count and count "1". --- src/main/java/org/commoncrawl/webgraph/explore/Graph.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/commoncrawl/webgraph/explore/Graph.java b/src/main/java/org/commoncrawl/webgraph/explore/Graph.java index 1b944e4..6fec31a 100644 --- a/src/main/java/org/commoncrawl/webgraph/explore/Graph.java +++ b/src/main/java/org/commoncrawl/webgraph/explore/Graph.java @@ -255,8 +255,11 @@ public Stream> topLevelDomainCounts(IntStream vertexIds) { } count++; } - curr = next; res.add(new SimpleEntry<>(tld, count)); + curr = next; + if (!iter.hasNext()) { + break; + } } while (curr > -1); } return res.stream().sorted(Collections.reverseOrder(Map.Entry.comparingByValue()));