diff --git a/src/site/markdown/release_notes.md b/src/site/markdown/release_notes.md index db58341ae0..76893f73e4 100644 --- a/src/site/markdown/release_notes.md +++ b/src/site/markdown/release_notes.md @@ -11,6 +11,7 @@ Full listing of changes and bug fixes are not available prior to release 1.2.0 a * Visualize snapshot density in bubble calendar using bubble background color. [#351](https://github.com/iipc/openwayback/issues/351) * Support Brotli (`br`) content-encoding. [#395](https://github.com/iipc/openwayback/pull/395) * Rewrite new `imagesrcset` attribute of the link element. [#394](https://github.com/iipc/openwayback/issues/394) +* Rewrite ftp and ftps scheme URIs in HTML. [#400](https://github.com/iipc/openwayback/pull/400) ### Bug fixes * Add proxy support to IP exclusion. [#260](https://github.com/iipc/openwayback/issues/260) diff --git a/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java b/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java index 6a835edae4..88fe038232 100644 --- a/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java +++ b/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java @@ -763,11 +763,18 @@ public void setRequestUrl(String urlStr) { // is known, adding an implied "http://" scheme if there doesn't appear // to be a scheme.. // TODO: make the default "http://" configurable. - if (!urlStr.startsWith(UrlOperations.HTTP_SCHEME) && !urlStr.startsWith(UrlOperations.HTTPS_SCHEME)) { + if (!urlStr.startsWith(UrlOperations.HTTP_SCHEME) && + !urlStr.startsWith(UrlOperations.HTTPS_SCHEME) && + !urlStr.startsWith(UrlOperations.FTP_SCHEME) && + !urlStr.startsWith(UrlOperations.FTPS_SCHEME)) { if(urlStr.startsWith("http:/")) { urlStr = UrlOperations.HTTP_SCHEME + urlStr.substring(6); } else if(urlStr.startsWith("https:/")) { urlStr = UrlOperations.HTTPS_SCHEME + urlStr.substring(7); + } else if(urlStr.startsWith("ftp:/")) { + urlStr = UrlOperations.FTP_SCHEME + urlStr.substring(5); + } else if(urlStr.startsWith("ftps:/")) { + urlStr = UrlOperations.FTPS_SCHEME + urlStr.substring(6); } else { if(UrlOperations.urlToScheme(urlStr) == null) { urlStr = UrlOperations.HTTP_SCHEME + urlStr; diff --git a/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java b/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java index c0b253437a..9af6f110c2 100644 --- a/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java +++ b/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java @@ -217,14 +217,22 @@ public String contextualizeUrl(final String url, String flags) { if (!trimmedUrl.startsWith("http://") && !trimmedUrl.startsWith("https://") && + !trimmedUrl.startsWith("ftp://") && + !trimmedUrl.startsWith("ftps://") && !trimmedUrl.startsWith("//") && !trimmedUrl.startsWith("http:\\\\/\\\\/") && !trimmedUrl.startsWith("http\\\\u00253A\\\\u00252F\\\\u00252F") && !trimmedUrl.startsWith("https:\\\\/\\\\/") && !trimmedUrl .startsWith("https\\\\u00253A\\\\u00252F\\\\u00252F") && + !trimmedUrl.startsWith("ftp:\\\\/\\\\/") && + !trimmedUrl.startsWith("ftp\\\\u00253A\\\\u00252F\\\\u00252F") && + !trimmedUrl.startsWith("ftps:\\\\/\\\\/") && + !trimmedUrl.startsWith("ftps\\\\u00253A\\\\u00252F\\\\u00252F") && !trimmedUrl.startsWith("http:\\/\\/") && !trimmedUrl.startsWith("https:\\/\\/") && + !trimmedUrl.startsWith("ftp:\\/\\/") && + !trimmedUrl.startsWith("ftps:\\/\\/") && !trimmedUrl.startsWith("/") && !trimmedUrl.startsWith(".")) { return url; diff --git a/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java b/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java index 397b6762d1..584ba1258e 100644 --- a/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java +++ b/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java @@ -57,6 +57,10 @@ public class UrlOperations { * FTP */ public final static String FTP_SCHEME = "ftp://"; + /** + * FTPS + */ + public final static String FTPS_SCHEME = "ftps://"; /** * MMS */ @@ -83,6 +87,7 @@ public class UrlOperations { HTTP_SCHEME, HTTPS_SCHEME, FTP_SCHEME, + FTPS_SCHEME, MMS_SCHEME, RTSP_SCHEME, WAIS_SCHEME diff --git a/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java b/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java index c7d82b04c7..1eac9b528b 100644 --- a/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java +++ b/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java @@ -97,6 +97,26 @@ public void testAnchorHrefRelative() throws Exception { assertEquals(expected, doEndToEnd(input)); } + public void testAnchorHrefFtp() throws Exception { + final String input = "" + + "foo.pdf" + + ""; + final String expected = "" + + "" + + "foo.pdf"; + assertEquals(expected, doEndToEnd(input)); + } + + public void testAnchorHrefFtps() throws Exception { + final String input = "" + + "foo.pdf" + + ""; + final String expected = "" + + "" + + "foo.pdf"; + assertEquals(expected, doEndToEnd(input)); + } + public void testAnchorHrefAbsoluteInJavascript() throws Exception { final String input = "" + "American Symphony Orchestra League" + diff --git a/wayback-core/src/test/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParserTest.java b/wayback-core/src/test/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParserTest.java index 48dc17d9fe..c8e9e16d55 100644 --- a/wayback-core/src/test/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParserTest.java +++ b/wayback-core/src/test/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParserTest.java @@ -90,6 +90,14 @@ public void testParseString() throws Exception { assertEquals("parsed request Url with https scheme missing a scheme slash", "https://foo.com/",r.getRequestUrl()); + r = p.parse("20070101000000/ftp:/foo.com/",ap); + assertEquals("parsed request Url with ftp scheme missing a scheme slash", + "ftp://foo.com/",r.getRequestUrl()); + + r = p.parse("20070101000000/ftps:/foo.com/",ap); + assertEquals("parsed request Url with ftps scheme missing a scheme slash", + "ftps://foo.com/",r.getRequestUrl()); + r = p.parse("20070101000000js_/http://foo.com/",ap); assertEquals("parsed request Url with js_ flag", "http://foo.com/",r.getRequestUrl());