diff --git a/crates/blend-feed/src/parse/entry.rs b/crates/blend-feed/src/parse/entry.rs index 536bd4a0..fed7fde0 100644 --- a/crates/blend-feed/src/parse/entry.rs +++ b/crates/blend-feed/src/parse/entry.rs @@ -1,6 +1,9 @@ use super::{get_feed, parse_url}; use crate::{error::FeedResult, extract::*, ParsedEntry}; +/// How many characters should the summary be before we treat it as the content (if there is already no content) +const SWAP_THRESHOLD: usize = 100; + /// Fetch feed and process each entry as needed pub async fn parse_entries(url: &str) -> FeedResult> { let url = parse_url(url)?; @@ -20,16 +23,30 @@ pub async fn parse_entries(url: &str) -> FeedResult> { .and_then(|media| media.content.first().and_then(|content| content.url.clone())) .map(|url| url.to_string()); + let mut summary_html = entry + .summary + .as_ref() + .map(|text| extract_stylistic_html(&text.content, &url.base)); + + let mut content_html = entry + .content + .and_then(|content| content.body.map(|content| extract_html(&content, &url.base))); + + // Some feeds may return article content as the summary/teaser. If this is the case, we want to + // swap the (empty) content with the summary, but only if it's a respectable length. + if content_html.as_ref().map_or(true, |html| html.is_empty()) + && summary_html.as_ref().is_some_and(|summary| summary.len() > SWAP_THRESHOLD) + { + content_html = entry.summary.map(|text| extract_html(&text.content, &url.base)); + summary_html = None; + } + ParsedEntry { id: entry.id, url: entry.links.first().map(|link| link.href.clone()), title: entry.title.map(|text| extract_text(&text.content)), - summary_html: entry - .summary - .map(|text| extract_stylistic_html(&text.content, &url.base)), - content_html: entry.content.and_then(|content| { - content.body.map(|content| extract_html(&content, &url.base)) - }), + summary_html, + content_html, media_url, published_at: entry.published, updated_at: entry.updated, diff --git a/src/main.rs b/src/main.rs index eae498d7..f66f299e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,7 +33,9 @@ async fn main() -> error::BlendResult<()> { args::Command::Parse { url } => { let feed = blend_feed::parse_feed(&url).await?; - dbg!(&feed); + let entries = blend_feed::parse_entries(&feed.url_feed).await?; + + dbg!(&feed, &entries); } crate::args::Command::Start => {