From ab9bed3a1620485908a722ae4e44a01daee4baf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20D=C3=BCsing?= Date: Tue, 24 Sep 2024 11:00:27 +0200 Subject: [PATCH 1/3] Some performance and stability improvements to iterator --- .../org/tudo/sse/model/ArtifactIdent.java | 24 +++++-- src/main/java/org/tudo/sse/utils/GAUtils.java | 14 ++++ .../org/tudo/sse/utils/IndexIterator.java | 68 ++++++++++++------- src/test/resources/PomInputs.json | 4 +- 4 files changed, 81 insertions(+), 29 deletions(-) diff --git a/src/main/java/org/tudo/sse/model/ArtifactIdent.java b/src/main/java/org/tudo/sse/model/ArtifactIdent.java index c7b9861..99335dc 100644 --- a/src/main/java/org/tudo/sse/model/ArtifactIdent.java +++ b/src/main/java/org/tudo/sse/model/ArtifactIdent.java @@ -12,6 +12,8 @@ * As well as methods for retrieving different file types from the maven central repository. */ public class ArtifactIdent { + + public static final String CENTRAL_REPOSITORY_URL = "https://repo1.maven.org/maven2/"; /** * The group section of the identifier. */ @@ -27,6 +29,11 @@ public class ArtifactIdent { */ private String GA; + /** + * The GAV triple of this artifact + */ + private String GAV; + /** * The version of the artifact. */ @@ -41,9 +48,8 @@ public class ArtifactIdent { public ArtifactIdent(String groupID, String artifactID, String version) { this.artifactID = artifactID; this.groupID = groupID; - this.GA = groupID + ":" + artifactID; this.version = version; - this.repository = "https://repo1.maven.org/maven2/"; + this.repository = CENTRAL_REPOSITORY_URL; } public ArtifactIdent(ArtifactIdent toCopy) { @@ -51,7 +57,6 @@ public ArtifactIdent(ArtifactIdent toCopy) { this.artifactID = toCopy.artifactID; this.version = toCopy.version; this.repository = toCopy.repository; - this.GA = toCopy.getGA(); } /** @@ -69,6 +74,7 @@ public String getGroupID() { public void setGroupID(String groupID) { this.groupID = groupID; this.GA = this.groupID + ":" + this.artifactID; + this.GAV = groupID + ":" + artifactID + ":" + version; } /** @@ -86,6 +92,7 @@ public String getArtifactID() { public void setArtifactID(String artifactID) { this.artifactID = artifactID; this.GA = this.groupID + ":" + this.artifactID; + this.GAV = groupID + ":" + artifactID + ":" + version; } /** @@ -93,6 +100,10 @@ public void setArtifactID(String artifactID) { * @return GA tuple separated by colon */ public String getGA(){ + if(this.GA == null){ + this.GA = this.groupID + ":" + this.artifactID; + } + return this.GA; } /** @@ -109,6 +120,7 @@ public String getVersion() { */ public void setVersion(String version) { this.version = version; + this.GAV = groupID + ":" + artifactID + ":" + version; } /** @@ -132,7 +144,11 @@ public void setRepository(String repository) { * @return full g:a:v value */ public String getCoordinates() { - return groupID + ":" + artifactID + ":" + version; + + if(this.GAV == null){ + this.GAV = groupID + ":" + artifactID + ":" + version; + } + return this.GAV; } /** diff --git a/src/main/java/org/tudo/sse/utils/GAUtils.java b/src/main/java/org/tudo/sse/utils/GAUtils.java index 744387e..d004024 100644 --- a/src/main/java/org/tudo/sse/utils/GAUtils.java +++ b/src/main/java/org/tudo/sse/utils/GAUtils.java @@ -49,6 +49,20 @@ public static List retrieveAllVersions(String groupId, String artifact } } + public static List getReleasesFromMetadata(ArtifactIdent identifier){ + try{ + Metadata meta = getVersions(identifier.getGroupID(), identifier.getArtifactID()); + + if(meta.getVersioning() == null){ + throw new RuntimeException("Invalid versioning in metadata: null"); + } + + return meta.getVersioning().getVersions(); + } catch(FileNotFoundException | IOException | XmlPullParserException x){ + throw new RuntimeException(x); + } + } + public static Artifact getLastModifiedVersion(String groupId, String artifactId) throws PomResolutionException { try { Metadata meta = getVersions(groupId, artifactId); diff --git a/src/main/java/org/tudo/sse/utils/IndexIterator.java b/src/main/java/org/tudo/sse/utils/IndexIterator.java index 7b1ee27..d320e0d 100644 --- a/src/main/java/org/tudo/sse/utils/IndexIterator.java +++ b/src/main/java/org/tudo/sse/utils/IndexIterator.java @@ -93,28 +93,37 @@ public Package processPackage(String information, String checksum) { * @see Package */ public IndexInformation processIndex(Map item) { + String uVal = item.get("u"); //process the G:A:V tuple - if(item.get("u") != null) { - ArtifactIdent temp = processArtifactIdent(item.get("u")); + if(uVal != null) { + ArtifactIdent temp = processArtifactIdent(uVal); - //Create an artifact using the values found in the 'i' and '1' tags - if(item.get("i") != null) { - String[] parts = item.get("i").split(IndexWalker.splitPattern); + return processIndex(item, temp); + } + return null; + } - Package tmpPackage = new Package(parts[0], Long.parseLong(parts[1]), Long.parseLong(parts[2]), Integer.parseInt(parts[3]), Integer.parseInt(parts[4]), Integer.parseInt(parts[5]), item.get("1")); + private IndexInformation processIndex(Map item, ArtifactIdent ident){ + String iVal = item.get("i"); - IndexInformation t = new IndexInformation(temp, tmpPackage); - t.setName(item.get("n")); - t.setIndex(index); - index++; + //Create an artifact using the values found in the 'i' and '1' tags + if(iVal != null) { + String[] parts = iVal.split(IndexWalker.splitPattern); - if(index != 0 && index % 500000 == 0){ - log.info("{} indexes have been processed.", index); - } + Package tmpPackage = new Package(parts[0], Long.parseLong(parts[1]), Long.parseLong(parts[2]), Integer.parseInt(parts[3]), Integer.parseInt(parts[4]), Integer.parseInt(parts[5]), item.get("1")); + + IndexInformation t = new IndexInformation(ident, tmpPackage); + t.setName(item.get("n")); + t.setIndex(index); + index++; - return t; + if(index != 0 && index % 500000 == 0){ + log.info("{} indexes have been processed.", index); } + + return t; } + return null; } @@ -139,7 +148,8 @@ public boolean hasNext() { //pass nextArtifact to currentArtifact if(nextArtifact == null) { - while (cr.hasNext() && currentArtifact == null) { + while (currentArtifact == null && cr.hasNext()) { + // If this fails with an exception, then there is really nothing we can do - let the exception bubble up currentArtifact = processIndex(cr.next()); } } else { @@ -149,21 +159,33 @@ public boolean hasNext() { //keep iterating the indexReader until the gav is different from the one in currentArtifact while (cr.hasNext()) { - Map curInfo = cr.next(); + Map currentEntry; - if (curInfo.get("u") == null) { - break; + try { + currentEntry = cr.next(); + } catch(RuntimeException rx){ + log.error("Failed to get entry from index: " + rx.getMessage()); + currentEntry = null; } - if (!(currentArtifact.getIdent().getCoordinates().equals(processArtifactIdent(curInfo.get("u")).getCoordinates()))) { + if (currentEntry == null) break; + + String currentUVal = currentEntry.get("u"); - //store into additional variable - nextArtifact = processIndex(curInfo); + if(currentUVal == null) break; + + final String currentArtifactGAV = currentArtifact.getIdent().getCoordinates(); + final ArtifactIdent currentEntryIdent = processArtifactIdent(currentUVal); + + if(!currentArtifactGAV.equals(currentEntryIdent.getCoordinates())){ + nextArtifact = processIndex(currentEntry, currentEntryIdent); break; } - if(curInfo.get("i") != null) { - currentArtifact.addAPackage(processPackage(curInfo.get("i"), curInfo.get("1"))); + String currentIVal = currentEntry.get("i"); + + if(currentIVal != null) { + currentArtifact.addAPackage(processPackage(currentIVal, currentEntry.get("1"))); index++; } } diff --git a/src/test/resources/PomInputs.json b/src/test/resources/PomInputs.json index 602b6c6..8b75470 100644 --- a/src/test/resources/PomInputs.json +++ b/src/test/resources/PomInputs.json @@ -362,9 +362,9 @@ "org.springframework.boot:spring-boot-starter-web:2.5.0:compile", "com.fasterxml.jackson.core:jackson-databind:2.10.0:compile", "org.hibernate:hibernate-core:5.3.0.CR2:compile", - "com.google.guava:guava:33.3.0-jre:compile", + "com.google.guava:guava:33.3.1-jre:compile", "org.apache.httpcomponents:httpclient:4.5.13:compile", - "commons-io:commons-io:2.16.1:compile" + "commons-io:commons-io:2.17.0:compile" ] ], From 5240f27e32095a01ec7ac13be033d713524ca559 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20D=C3=BCsing?= Date: Tue, 24 Sep 2024 12:49:15 +0200 Subject: [PATCH 2/3] Reduce memory footprint of artifact ident --- .../java/org/tudo/sse/model/ArtifactIdent.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/tudo/sse/model/ArtifactIdent.java b/src/main/java/org/tudo/sse/model/ArtifactIdent.java index 99335dc..44442c6 100644 --- a/src/main/java/org/tudo/sse/model/ArtifactIdent.java +++ b/src/main/java/org/tudo/sse/model/ArtifactIdent.java @@ -39,9 +39,9 @@ public class ArtifactIdent { */ private String version; /** - * The repository where this artifact can be found. + * The repository where this artifact can be found - if different from the central Repo */ - private String repository; + private String customRepository; private static final Logger log = LogManager.getLogger(ArtifactIdent.class); @@ -49,14 +49,13 @@ public ArtifactIdent(String groupID, String artifactID, String version) { this.artifactID = artifactID; this.groupID = groupID; this.version = version; - this.repository = CENTRAL_REPOSITORY_URL; } public ArtifactIdent(ArtifactIdent toCopy) { this.groupID = toCopy.groupID; this.artifactID = toCopy.artifactID; this.version = toCopy.version; - this.repository = toCopy.repository; + this.customRepository = toCopy.customRepository; } /** @@ -128,7 +127,8 @@ public void setVersion(String version) { * @return repository */ public String getRepository() { - return repository; + if(this.customRepository != null) return this.customRepository; + else return CENTRAL_REPOSITORY_URL; } /** @@ -136,7 +136,7 @@ public String getRepository() { * @param repository new repository value */ public void setRepository(String repository) { - this.repository = repository; + this.customRepository = repository; } /** @@ -157,7 +157,7 @@ public String getCoordinates() { */ public URI getMavenCentralPomUri() { try { - if(repository.equals("https://repo1.maven.org/maven2/")) { + if(customRepository == null) { return MavenCentralRepository.buildPomFileURI(this); } else { return MavenCentralRepository.buildSecondaryPomFileURI(this, getRepository()); From 9ef389a445975f1d3d66c415d8d7898e1925d32e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20D=C3=BCsing?= Date: Tue, 24 Sep 2024 13:14:29 +0200 Subject: [PATCH 3/3] Add recovery code for SSL connection resets --- .../org/tudo/sse/utils/IndexIterator.java | 61 ++++++++++++++++--- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/tudo/sse/utils/IndexIterator.java b/src/main/java/org/tudo/sse/utils/IndexIterator.java index d320e0d..9861d6e 100644 --- a/src/main/java/org/tudo/sse/utils/IndexIterator.java +++ b/src/main/java/org/tudo/sse/utils/IndexIterator.java @@ -8,6 +8,7 @@ import org.tudo.sse.model.index.Package; import org.tudo.sse.model.index.IndexInformation; +import javax.net.ssl.SSLException; import java.io.IOException; import java.net.URI; import java.util.Iterator; @@ -22,8 +23,10 @@ public class IndexIterator implements Iterator { private long index; - private final IndexReader ir; - private final Iterator> cr; + + private final URI baseUri; + private IndexReader ir; + private Iterator> cr; private IndexInformation currentArtifact; private IndexInformation nextArtifact; private boolean prevHasNext; @@ -32,6 +35,7 @@ public class IndexIterator implements Iterator { public IndexIterator(URI base) throws IOException { + baseUri = base; ir = new IndexReader(null, new HttpResourceHandler(base.resolve(".index/"))); cr = ir.iterator().next().iterator(); index = 0; @@ -40,22 +44,36 @@ public IndexIterator(URI base) throws IOException { } public IndexIterator(URI base, long startingIndex) throws IOException { - ir = new IndexReader(null, new HttpResourceHandler(base.resolve(".index/"))); - cr = ir.iterator().next().iterator(); - index = 0; + this(base); while(cr.hasNext() && index != startingIndex) { cr.next(); index++; } - currentArtifact = null; - nextArtifact = null; } public void closeReader() throws IOException { ir.close(); } + private void recoverConnectionReset() throws IOException{ + long indexPos = getIndex(); + log.info("Recovering from connection reset at index {}", indexPos); + + + ir = new IndexReader(null, new HttpResourceHandler(baseUri.resolve(".index/"))); + cr = ir.iterator().next().iterator(); + index = 0; + + while(cr.hasNext() && index < indexPos){ + cr.next(); + index++; + if(index % 1000000 == 0) log.debug("Skipping indices for recovery, {} processed so far ...", index); + } + + log.info("Recovery successful, reset chunk reader to index {}.", indexPos); + } + /** * This method takes in a string of the gav tuple and creates an artifactIdent from it. * @param gav string version of an artifact identifier "g:a:v" @@ -149,8 +167,33 @@ public boolean hasNext() { //pass nextArtifact to currentArtifact if(nextArtifact == null) { while (currentArtifact == null && cr.hasNext()) { - // If this fails with an exception, then there is really nothing we can do - let the exception bubble up - currentArtifact = processIndex(cr.next()); + try { + // This may fail with an SSL connection reset exception... + currentArtifact = processIndex(cr.next()); + } catch(RuntimeException rx){ + + // Try to find out if this read error was caused by an SSL connection reset. + boolean causedBySsl = false; + Throwable current = rx; + while(!causedBySsl && current.getCause() != null){ + current = current.getCause(); + causedBySsl = current instanceof SSLException; + } + + if(rx.getMessage().contains("read error") && causedBySsl){ + // If so, try to recover by re-initializing the reader (and skipping to the right position) + try { + recoverConnectionReset(); + } catch(Exception x){ + log.error("Recovery unsuccessful: " + x.getMessage()); + throw new RuntimeException(x); + } + } else { + // Don't try to handle other exceptions with recovery + throw rx; + } + } + } } else { currentArtifact = nextArtifact;