From 42111243b2f1874930dfa4de85648d261309ce97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andris=20Dobi=C4=8Dinaitis?= Date: Wed, 26 Jun 2024 01:37:55 +0300 Subject: [PATCH] Added the ability to exclude articles that are accessible only to paying subscribers. --- .github/workflows/sync.yml | 1 + readme.md | 4 ++- .../feedreader/cli/commands/MainCommand.java | 19 +++++++----- .../feedreader/dto/SyncSettings.java | 2 +- .../feedreader/services/SyncService.java | 16 ++++++++++ .../feedreader/services/SyncServiceTest.java | 29 +++++++++++++------ 6 files changed, 53 insertions(+), 18 deletions(-) diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml index f51fc36..dbf2e3c 100644 --- a/.github/workflows/sync.yml +++ b/.github/workflows/sync.yml @@ -15,6 +15,7 @@ jobs: FEED_READER_DEBUG: ${{ vars.FEED_READER_DEBUG }} FEED_READER_READ_BUTTON_LABEL: ${{ vars.FEED_READER_READ_BUTTON_LABEL }} FEED_READER_EXCLUDE_CATEGORIES: ${{ vars.FEED_READER_EXCLUDE_CATEGORIES }} + FEED_READER_EXCLUDE_PAYWALLED: ${{ vars.FEED_READER_EXCLUDE_PAYWALLED }} steps: - name: Download JAR from the latest release uses: robinraju/release-downloader@v1.9 diff --git a/readme.md b/readme.md index ff9e18a..1a972f1 100644 --- a/readme.md +++ b/readme.md @@ -10,7 +10,7 @@ Actions [workflow](https://github.com/dobicinaitis/apollo-lv-to-telegram/actions ## CLI usage ```commandline -java -jar apollo-lv-to-telegram.jar [-dhV] [-u=URL] [-t=TOKEN] [-c=CHANNEL_ID] [-s=FILE] [-r=LABEL] [-e=CATEGORY[,CATEGORY...]]... +java -jar apollo-lv-to-telegram.jar [-dhVp] [-u=URL] [-t=TOKEN] [-c=CHANNEL_ID] [-s=FILE] [-r=LABEL] [-e=CATEGORY[,CATEGORY...]]... OPTIONS -u, --url=URL News feed RSS URL (default: https://www.apollo.lv/rss). @@ -21,6 +21,7 @@ OPTIONS -r, --read-button-label=LABEL Label for the "Read" button in Telegram. -e, --exclude-categories=CATEGORY[,CATEGORY...] List of article categories to exclude. + -p, --exclude-paywalled Exclude articles that are accessible only to paying subscribers. -V, --version Print version information and exit. -d, --debug Print debug information. -h, --help Show this help message and exit. @@ -40,6 +41,7 @@ Most CLI parameters can also be provided via environment variables. | `--status-file` | `FEED_READER_STATUS_FILE` | last-sync-status.json | | `--read-button-label` | `FEED_READER_READ_BUTTON_LABEL` | Read | | `--exclude-categories` | `FEED_READER_EXCLUDE_CATEGORIES` | sports,horoscopes | +| `--exclude-paywalled` | `FEED_READER_EXCLUDE_PAYWALLED` | `true`/`false` | | `--debug` | `FEED_READER_DEBUG` | `true`/`false` | This can be useful when running the application inside a container, to hide sensitive information from CI/CD logs, diff --git a/src/main/java/dev/dobicinaitis/feedreader/cli/commands/MainCommand.java b/src/main/java/dev/dobicinaitis/feedreader/cli/commands/MainCommand.java index 7f347a0..61ae5fd 100644 --- a/src/main/java/dev/dobicinaitis/feedreader/cli/commands/MainCommand.java +++ b/src/main/java/dev/dobicinaitis/feedreader/cli/commands/MainCommand.java @@ -70,6 +70,10 @@ private void setExcludedCategories(final List excludedCategories) { } } + @Option(names = {"-p", "--exclude-paywalled"}, defaultValue = "${FEED_READER_EXCLUDE_PAYWALLED:-false}", + description = "Exclude articles that are accessible only to paying subscribers.", order = 7) + private boolean excludePaywalled; + @Option(names = "--no-sync", hidden = true, defaultValue = "${FEED_NO_SYNC:-false}", description = "A hidden parameter used to ease testing.") private boolean syncDisabled; @@ -85,13 +89,14 @@ public void run() { } log.info("Starting feed sync, RSS URL: {}", url); - final SyncSettings syncSettings = SyncSettings.builder(). - rssUrl(url). - telegramBotToken(botToken). - telegramChannelId(channelId). - statusFile(statusFile). - excludedCategories(excludedCategories). - build(); + final SyncSettings syncSettings = SyncSettings.builder() + .rssUrl(url) + .telegramBotToken(botToken) + .telegramChannelId(channelId) + .statusFile(statusFile) + .excludedCategories(excludedCategories) + .excludePaywalled(excludePaywalled) + .build(); final SyncService syncService = new SyncService(syncSettings); syncService.sync(); } diff --git a/src/main/java/dev/dobicinaitis/feedreader/dto/SyncSettings.java b/src/main/java/dev/dobicinaitis/feedreader/dto/SyncSettings.java index 66cc93e..15658c8 100644 --- a/src/main/java/dev/dobicinaitis/feedreader/dto/SyncSettings.java +++ b/src/main/java/dev/dobicinaitis/feedreader/dto/SyncSettings.java @@ -9,10 +9,10 @@ @Data @Builder public class SyncSettings { - private String rssUrl; private String telegramBotToken; private String telegramChannelId; private File statusFile; private List excludedCategories; + private boolean excludePaywalled; } diff --git a/src/main/java/dev/dobicinaitis/feedreader/services/SyncService.java b/src/main/java/dev/dobicinaitis/feedreader/services/SyncService.java index cf8d486..28c7d04 100644 --- a/src/main/java/dev/dobicinaitis/feedreader/services/SyncService.java +++ b/src/main/java/dev/dobicinaitis/feedreader/services/SyncService.java @@ -76,6 +76,10 @@ public void sync() { log.info("Updating paywall flags."); articles.parallelStream().forEach(article -> article.setPaywalled(hasPaywallLabel(article.getLink()))); + if (settings.isExcludePaywalled()) { + removePaywalledArticles(articles); + } + log.info("Posting {} new articles to Telegram.", articles.size()); final Article lastPostedArticle = telegram.postArticles(articles); @@ -201,6 +205,18 @@ protected void removeExcludedCategories(List items) { } } + /** + * Removes paywalled articles from the list. + * + * @param articles excluding paywalled ones + */ + protected void removePaywalledArticles(List
articles) { + final int initialSize = articles.size(); + articles.removeIf(Article::isPaywalled); + final int removedCount = initialSize - articles.size(); + log.info("Removed {} paywalled article{}, {} remaining.", removedCount, removedCount == 1 ? "" : "s", articles.size()); + } + /** * Converts a list of strings to lowercase. * diff --git a/src/test/java/dev/dobicinaitis/feedreader/services/SyncServiceTest.java b/src/test/java/dev/dobicinaitis/feedreader/services/SyncServiceTest.java index d28a719..25e05c8 100644 --- a/src/test/java/dev/dobicinaitis/feedreader/services/SyncServiceTest.java +++ b/src/test/java/dev/dobicinaitis/feedreader/services/SyncServiceTest.java @@ -24,11 +24,12 @@ class SyncServiceTest { private static final TestFeedServer feedServer = new TestFeedServer(); + private SyncSettings syncSettings; private SyncService syncService; @BeforeEach void setUp() { - final SyncSettings syncSettings = SyncSettings.builder() + syncSettings = SyncSettings.builder() .rssUrl(feedServer.getFeedUrl()) .telegramBotToken("bot-token") .telegramChannelId("channel-id") @@ -115,8 +116,8 @@ void shouldReadSyncStatusFromFile() throws Exception { // given final FeedReaderService feedReader = new FeedReaderService(feedServer.getFeedUrl()); final List testFeedArticles = feedReader.getItems(); - final String title = testFeedArticles.get(0).getTitle().orElseThrow(); - final ZonedDateTime publicationDate = testFeedArticles.get(0).getPubDateZonedDateTime().orElseThrow(); + final String title = testFeedArticles.getFirst().getTitle().orElseThrow(); + final ZonedDateTime publicationDate = testFeedArticles.getFirst().getPubDateZonedDateTime().orElseThrow(); final File statusFile = prepareStatusFile(title, publicationDate); syncService.setStatusFile(statusFile); // when @@ -153,7 +154,7 @@ void shouldRemoveProcessedArticles() { syncService.removeProcessedArticles(articles, syncStatus); // then assertEquals(1, articles.size(), "Only 1 article should be left."); - assertEquals("new", articles.get(0).getTitle(), "The newest article should be left."); + assertEquals("new", articles.getFirst().getTitle(), "The newest article should be left."); } @Test @@ -175,11 +176,7 @@ void shouldExcludeUnwantedCategories() { ItemWrapper.builder().title("boring 1").categories(List.of("news", "gossip")).build().toRssItem(), ItemWrapper.builder().title("boring 2").categories(List.of("news", "technology", "pascal")).build().toRssItem() )); - final SyncSettings syncSettings = SyncSettings.builder() - .excludedCategories(List.of("gossip", "pascal")) - .telegramBotToken("bot-token") - .build(); - syncService = new SyncService(syncSettings); + syncSettings.setExcludedCategories(List.of("gossip", "pascal")); // when syncService.removeExcludedCategories(rssItems); // then @@ -188,6 +185,20 @@ void shouldExcludeUnwantedCategories() { assertEquals("interesting 2", rssItems.get(1).getTitle().orElse(""), "The second article should be left."); } + @Test + void shouldExcludePaywalledArticles() { + // given + final List
articles = new ArrayList<>(Arrays.asList( + Article.builder().title("free").build(), + Article.builder().title("paywalled").paywalled(true).build() + )); + // when + syncService.removePaywalledArticles(articles); + // then + assertEquals(1, articles.size(), "Only 1 article should be left."); + assertEquals("free", articles.getFirst().getTitle(), "The free article should be left."); + } + /** * Prepares a temporary status file with the given title and publication date. *