diff --git a/README.md b/README.md
index c4cfa61..0af638f 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,11 @@ The assembly jar file includes also the [WebGraph](https://webgraph.di.unimi.it/
The Javadocs are created by `mvn javadoc:javadoc`. Then open the file `target/site/apidocs/index.html` in a browser.
+### Source Code Formatting
+
+Run `mvn spotless:check` and `mvn spotless:apply`, see the [Spotless Maven guide](https://github.com/diffplug/spotless/blob/main/plugin-maven/README.md). Java formatting rules are defined in [eclipse-formatter.xml](eclipse-formatter.xml).
+
+
## Memory and Disk Requirements
Note that the webgraphs are usually multiple Gigabytes in size and require for processing
diff --git a/eclipse-formatter.xml b/eclipse-formatter.xml
new file mode 100644
index 0000000..80cca1c
--- /dev/null
+++ b/eclipse-formatter.xml
@@ -0,0 +1,404 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/pom.xml b/pom.xml
index 2e7db53..260f7cd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,5 +1,8 @@
-
+
+
4.0.0
org.commoncrawl
@@ -25,68 +28,6 @@
5.13.2
-
-
-
- src/main/resources
-
-
-
-
- maven-compiler-plugin
- 3.14.0
-
- ${java.version}
- ${java.version}
-
-
-
- maven-assembly-plugin
- 3.7.1
-
-
- jar-with-dependencies
-
- cc-webgraph-${project.version}
-
-
-
- package
-
- single
-
-
-
-
-
- maven-surefire-plugin
- 3.5.2
-
-
- org.apache.maven.plugins
- maven-enforcer-plugin
- 3.5.0
-
-
- enforce-maven
-
- enforce
-
-
-
-
- 3.6.3
-
-
-
-
-
-
-
-
-
-
-
@@ -248,4 +189,89 @@
+
+
+
+
+ src/main/resources
+
+
+
+
+ maven-compiler-plugin
+ 3.14.0
+
+ ${java.version}
+ ${java.version}
+
+
+
+ maven-assembly-plugin
+ 3.7.1
+
+
+ jar-with-dependencies
+
+ cc-webgraph-${project.version}
+
+
+
+
+ single
+
+ package
+
+
+
+
+ maven-surefire-plugin
+ 3.5.2
+
+
+ org.apache.maven.plugins
+ maven-enforcer-plugin
+ 3.5.0
+
+
+ enforce-maven
+
+ enforce
+
+
+
+
+ 3.6.3
+
+
+
+
+
+
+
+ com.diffplug.spotless
+ spotless-maven-plugin
+ 2.46.1
+
+
+
+
+ pom.xml
+
+
+ all
+ true
+ false
+ -1
+ recommended_2008_06
+
+
+
+
+ ${project.basedir}/eclipse-formatter.xml
+
+
+
+
+
+
diff --git a/src/main/java/org/commoncrawl/webgraph/CreatePreferenceVector.java b/src/main/java/org/commoncrawl/webgraph/CreatePreferenceVector.java
index a4b973a..3d55ee5 100644
--- a/src/main/java/org/commoncrawl/webgraph/CreatePreferenceVector.java
+++ b/src/main/java/org/commoncrawl/webgraph/CreatePreferenceVector.java
@@ -35,7 +35,6 @@ public class CreatePreferenceVector {
long recordsProcessed;
long preferenceNamesFound;
-
public CreatePreferenceVector(double defVal) {
defaultPreferenceValue = defVal;
}
diff --git a/src/main/java/org/commoncrawl/webgraph/HostToDomainGraph.java b/src/main/java/org/commoncrawl/webgraph/HostToDomainGraph.java
index a47b4be..1b60cf7 100644
--- a/src/main/java/org/commoncrawl/webgraph/HostToDomainGraph.java
+++ b/src/main/java/org/commoncrawl/webgraph/HostToDomainGraph.java
@@ -337,7 +337,10 @@ public String convertNode(String line) {
return null;
}
if (lastDomain != null && domain.equals(lastDomain.name)) {
- // short cut for the common case of many subsequent subdomains of the same domain
+ /*
+ * short cut for the common case of many subsequent subdomains of the same
+ * domain
+ */
lastDomain.add(id);
return null;
}
diff --git a/src/main/java/org/commoncrawl/webgraph/JoinSortRanks.java b/src/main/java/org/commoncrawl/webgraph/JoinSortRanks.java
index f829ccc..cc6bf22 100644
--- a/src/main/java/org/commoncrawl/webgraph/JoinSortRanks.java
+++ b/src/main/java/org/commoncrawl/webgraph/JoinSortRanks.java
@@ -97,7 +97,7 @@ private void assignRank(int[] ranks, IntComparator comp) {
indirectSortPerm[i] = i;
}
Arrays.parallelQuickSort(0, length, comp, this::swapIndirect);
- for (int i = 0; i < length; ) {
+ for (int i = 0; i < length;) {
ranks[indirectSortPerm[i]] = ++i;
}
indirectSortPerm = null;
@@ -139,7 +139,7 @@ public String addRanks(String line) {
long id = Long.parseLong(line.substring(0, sep));
// check whether new line is already contained
int end = line.lastIndexOf('\n');
- String revHost = line.substring(sep+1);
+ String revHost = line.substring(sep + 1);
float hcv = getHarmonicCentralityValue(id);
long hcr = getHarmonicCentralityRank(id);
double prv = getPageRankValue(id);
@@ -160,7 +160,6 @@ public String addRanks(String line) {
return sb.toString();
}
-
/**
* Implementation of {@link JoinSortRanks} for lists exceeding
* {@link Arrays#MAX_ARRAY_SIZE}.
@@ -229,7 +228,7 @@ private void assignRank(long[][] ranks, LongComparator comp) {
BigArrays.set(indirectSortPerm, i, i);
}
BigArrays.quickSort(0, length, comp, this::swapIndirect);
- for (long i = 0; i < length; ) {
+ for (long i = 0; i < length;) {
BigArrays.set(ranks, BigArrays.get(indirectSortPerm, i), ++i);
}
indirectSortPerm = null;
diff --git a/src/main/java/org/commoncrawl/webgraph/explore/Graph.java b/src/main/java/org/commoncrawl/webgraph/explore/Graph.java
index fcb6214..c2e4cc1 100644
--- a/src/main/java/org/commoncrawl/webgraph/explore/Graph.java
+++ b/src/main/java/org/commoncrawl/webgraph/explore/Graph.java
@@ -384,7 +384,6 @@ public static String getTopLevelDomain(String reversedDomainName) {
return reversedDomainName;
}
-
/** Intersection of two sorted lists */
public static long[] intersect(long[] a, long[] b) {
int m = a.length;
@@ -523,7 +522,9 @@ public void subgraphMetrics(long[] nodes) {
LOG.info("\toutlinks = {} (links from the subgraph to outer nodes)", clusterOutlinks);
LOG.info("\ttotal inlinks = {} (all inlinks)", totalInlinks);
LOG.info("\ttotal outlinks = {} (all outlinks)", totalOutlinks);
- LOG.info("\tnodes linked = {} (outer nodes linked from subgraph)", sharedSuccessors(nodes, 1, nodes.length).length);
- LOG.info("\tnodes linking = {} (outer nodes linking to subgraph)", sharedPredecessors(nodes, 1, nodes.length).length);
+ LOG.info("\tnodes linked = {} (outer nodes linked from subgraph)",
+ sharedSuccessors(nodes, 1, nodes.length).length);
+ LOG.info("\tnodes linking = {} (outer nodes linking to subgraph)",
+ sharedPredecessors(nodes, 1, nodes.length).length);
}
}
diff --git a/src/main/java/org/commoncrawl/webgraph/explore/GraphExplorer.java b/src/main/java/org/commoncrawl/webgraph/explore/GraphExplorer.java
index 1e091e6..a3f1654 100644
--- a/src/main/java/org/commoncrawl/webgraph/explore/GraphExplorer.java
+++ b/src/main/java/org/commoncrawl/webgraph/explore/GraphExplorer.java
@@ -189,15 +189,14 @@ public void sl(String vertexLabel) {
public long[] loadVerticesFromFile(String fileName) {
AtomicLong lines = new AtomicLong();
try (Stream in = Files.lines(Paths.get(fileName), StandardCharsets.UTF_8)) {
- long[] res = in.mapToLong(
- label -> {
- lines.incrementAndGet();
- long id = g.vertexLabelToId(label);
- if (id == -1) {
- LOG.debug("Vertex `{}` not found in graph.", label);
- }
- return id;
- }).filter(id -> id > -1).toArray();
+ long[] res = in.mapToLong(label -> {
+ lines.incrementAndGet();
+ long id = g.vertexLabelToId(label);
+ if (id == -1) {
+ LOG.debug("Vertex `{}` not found in graph.", label);
+ }
+ return id;
+ }).filter(id -> id > -1).toArray();
LOG.info("Loaded {} vertices of {} lines in {}.", res.length, lines, fileName);
return res;
} catch (IOException e) {
diff --git a/src/test/java/org/commoncrawl/webgraph/TestCountingMergedIntIterator.java b/src/test/java/org/commoncrawl/webgraph/TestCountingMergedIntIterator.java
index 3cd1ebf..a973b4a 100644
--- a/src/test/java/org/commoncrawl/webgraph/TestCountingMergedIntIterator.java
+++ b/src/test/java/org/commoncrawl/webgraph/TestCountingMergedIntIterator.java
@@ -26,16 +26,16 @@ void testSimple() {
CountingMergedIntIterator iter = new CountingMergedIntIterator(LazyIntIterators.EMPTY_ITERATOR);
assertFalse(iter.hasNext());
- int[][][] testArrays = { //
- {{0, 1}}, //
- {{0}, {1}}, //
- {{1}, {0}}, //
- {{1}, {0}, {}}, //
- {{1}, {0}, {}, {0}, {0}}, //
- {{1}, {0}, {}, {0}, {0, 1}}, //
+ int[][][] testArrays = { //
+ { { 0, 1 } }, //
+ { { 0 }, { 1 } }, //
+ { { 1 }, { 0 } }, //
+ { { 1 }, { 0 }, {} }, //
+ { { 1 }, { 0 }, {}, { 0 }, { 0 } }, //
+ { { 1 }, { 0 }, {}, { 0 }, { 0, 1 } }, //
// tests for input arrays with repeating numbers
- {{1, 1}, {0, 0}, {}, {0, 0}, {0, 0}}, //
- {{1, 1}, {0, 0}, {}, {0}, {0, 1}} //
+ { { 1, 1 }, { 0, 0 }, {}, { 0, 0 }, { 0, 0 } }, //
+ { { 1, 1 }, { 0, 0 }, {}, { 0 }, { 0, 1 } } //
};
for (int[][] tArrays : testArrays) {
@@ -48,7 +48,7 @@ void testSimple() {
int totalCount = 0;
iter = new CountingMergedIntIterator(tIters);
assertTrue(iter.hasNext());
-
+
assertEquals(0, iter.nextInt());
assertTrue(iter.getCount() > 0);
totalCount += iter.getCount();