Skip to content

Commit

Permalink
swap summary with content (#45)
Browse files Browse the repository at this point in the history
  • Loading branch information
zaknesler authored May 27, 2024
1 parent b7c97da commit 41f2c35
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 7 deletions.
29 changes: 23 additions & 6 deletions crates/blend-feed/src/parse/entry.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use super::{get_feed, parse_url};
use crate::{error::FeedResult, extract::*, ParsedEntry};

/// How many characters should the summary be before we treat it as the content (if there is already no content)
const SWAP_THRESHOLD: usize = 100;

/// Fetch feed and process each entry as needed
pub async fn parse_entries(url: &str) -> FeedResult<Vec<ParsedEntry>> {
let url = parse_url(url)?;
Expand All @@ -20,16 +23,30 @@ pub async fn parse_entries(url: &str) -> FeedResult<Vec<ParsedEntry>> {
.and_then(|media| media.content.first().and_then(|content| content.url.clone()))
.map(|url| url.to_string());

let mut summary_html = entry
.summary
.as_ref()
.map(|text| extract_stylistic_html(&text.content, &url.base));

let mut content_html = entry
.content
.and_then(|content| content.body.map(|content| extract_html(&content, &url.base)));

// Some feeds may return article content as the summary/teaser. If this is the case, we want to
// swap the (empty) content with the summary, but only if it's a respectable length.
if content_html.as_ref().map_or(true, |html| html.is_empty())
&& summary_html.as_ref().is_some_and(|summary| summary.len() > SWAP_THRESHOLD)
{
content_html = entry.summary.map(|text| extract_html(&text.content, &url.base));
summary_html = None;
}

ParsedEntry {
id: entry.id,
url: entry.links.first().map(|link| link.href.clone()),
title: entry.title.map(|text| extract_text(&text.content)),
summary_html: entry
.summary
.map(|text| extract_stylistic_html(&text.content, &url.base)),
content_html: entry.content.and_then(|content| {
content.body.map(|content| extract_html(&content, &url.base))
}),
summary_html,
content_html,
media_url,
published_at: entry.published,
updated_at: entry.updated,
Expand Down
4 changes: 3 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ async fn main() -> error::BlendResult<()> {

args::Command::Parse { url } => {
let feed = blend_feed::parse_feed(&url).await?;
dbg!(&feed);
let entries = blend_feed::parse_entries(&feed.url_feed).await?;

dbg!(&feed, &entries);
}

crate::args::Command::Start => {
Expand Down

0 comments on commit 41f2c35

Please sign in to comment.