From 5cd4a00907f32f37becd8d81679b8a7357689ca1 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Mon, 9 Jan 2023 10:56:26 +0100 Subject: [PATCH] Update to crawl CC-MAIN-2022-49 --- .../java/org/dstadler/commoncrawl/index/DownloadURLIndex.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/dstadler/commoncrawl/index/DownloadURLIndex.java b/src/main/java/org/dstadler/commoncrawl/index/DownloadURLIndex.java index 7b08463d..4f3cd2be 100644 --- a/src/main/java/org/dstadler/commoncrawl/index/DownloadURLIndex.java +++ b/src/main/java/org/dstadler/commoncrawl/index/DownloadURLIndex.java @@ -29,7 +29,7 @@ public class DownloadURLIndex { private static final Logger log = LoggerFactory.make(); // https://commoncrawl.org/connect/blog/ - public static final String CURRENT_CRAWL = "CC-MAIN-2022-33"; + public static final String CURRENT_CRAWL = "CC-MAIN-2022-49"; public static final File COMMON_CRAWL_FILE = new File("commoncrawl-" + CURRENT_CRAWL + ".txt"); private static final int START_INDEX = 0;