Skip to content

Commit

Permalink
add more fields
Browse files Browse the repository at this point in the history
  • Loading branch information
zaknesler committed May 22, 2024
1 parent bfd1d09 commit 19712c5
Show file tree
Hide file tree
Showing 15 changed files with 3,305 additions and 2,646 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/blend-db/migrations/20240422000000_feeds.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CREATE TABLE IF NOT EXISTS feeds (
url_site TEXT,
title TEXT,
title_display TEXT,
favicon_url TEXT,
favicon_b64 BLOB,
published_at DATETIME,
updated_at DATETIME,
Expand Down
3 changes: 2 additions & 1 deletion crates/blend-db/migrations/20240430000000_entries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ CREATE TABLE IF NOT EXISTS entries (
id TEXT NOT NULL,
url TEXT,
title TEXT,
summary TEXT,
summary_html TEXT,
content_html TEXT,
content_scraped_html TEXT,
published_at DATETIME,
updated_at DATETIME,
read_at DATETIME,
saved_at DATETIME,
UNIQUE(feed_uuid, id),
CONSTRAINT fk_feed FOREIGN KEY (feed_uuid) REFERENCES feeds(uuid) ON DELETE CASCADE
);
3 changes: 2 additions & 1 deletion crates/blend-db/src/model/entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ pub struct Entry {
pub id: String,
pub url: String,
pub title: Option<String>,
pub summary: Option<String>,
pub summary_html: Option<String>,
#[sqlx(default)]
pub content_html: Option<String>,
#[sqlx(default)]
pub content_scraped_html: Option<String>,
pub published_at: Option<DateTime<Utc>>,
pub updated_at: Option<DateTime<Utc>>,
pub read_at: Option<DateTime<Utc>>,
pub saved_at: Option<DateTime<Utc>>,
}
3 changes: 2 additions & 1 deletion crates/blend-db/src/model/feed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ pub struct Feed {
pub url_site: Option<String>,
pub title: Option<String>,
pub title_display: Option<String>,
pub favicon_b64: Option<Vec<u8>>,
pub favicon_b64: Option<String>,
pub favicon_url: Option<String>,
pub published_at: Option<DateTime<Utc>>,
pub updated_at: Option<DateTime<Utc>>,
}
Expand Down
10 changes: 5 additions & 5 deletions crates/blend-db/src/repo/entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub struct CreateEntryParams {
pub id: String,
pub url: Option<String>,
pub title: Option<String>,
pub summary: Option<String>,
pub summary_html: Option<String>,
pub content_html: Option<String>,
pub published_at: Option<DateTime<Utc>>,
pub updated_at: Option<DateTime<Utc>>,
Expand Down Expand Up @@ -86,7 +86,7 @@ impl EntryRepo {
let el = filter.sort.query_elements();
let el_inv = filter.sort.query_elements_inverse();

let mut query = QueryBuilder::<Sqlite>::new("SELECT uuid, feed_uuid, id, url, title, summary, published_at, updated_at, read_at FROM entries WHERE 1=1");
let mut query = QueryBuilder::<Sqlite>::new("SELECT uuid, feed_uuid, id, url, title, summary_html, published_at, updated_at, read_at FROM entries WHERE 1=1");

match filter.view {
View::All => query.push(""),
Expand Down Expand Up @@ -176,14 +176,14 @@ impl EntryRepo {
return Ok(vec![]);
}

let mut query = QueryBuilder::<Sqlite>::new("INSERT INTO entries (feed_uuid, uuid, id, url, title, summary, content_html, published_at, updated_at) ");
let mut query = QueryBuilder::<Sqlite>::new("INSERT INTO entries (feed_uuid, uuid, id, url, title, summary_html, content_html, published_at, updated_at) ");
query.push_values(entries.iter(), |mut b, entry| {
b.push_bind(feed_uuid)
.push_bind(uuid::Uuid::new_v4())
.push_bind(entry.id.clone())
.push_bind(entry.url.clone())
.push_bind(entry.title.clone())
.push_bind(entry.summary.clone())
.push_bind(entry.summary_html.clone())
.push_bind(entry.content_html.clone())
.push_bind(entry.published_at)
.push_bind(entry.updated_at);
Expand All @@ -194,7 +194,7 @@ impl EntryRepo {
DO UPDATE SET
url = excluded.url,
title = excluded.title,
summary = excluded.summary,
summary_html = excluded.summary_html,
content_html = excluded.content_html,
updated_at = excluded.updated_at
RETURNING uuid
Expand Down
1 change: 0 additions & 1 deletion crates/blend-feed/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ license.workspace = true
ammonia = "4.0"
chrono = { workspace = true, features = ["serde"] }
feed-rs = { workspace = true }
html5ever = "0.27"
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
Expand Down
1 change: 1 addition & 0 deletions crates/blend-feed/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ async fn get_feed(url: &str) -> FeedResult<feed_rs::model::Feed> {
pub async fn parse_feed(url: &str) -> FeedResult<ParsedFeed> {
let feed = get_feed(url).await?;

// Parse favicon URL to use until we can convert the remote image into binary data stored in the db
let favicon_url = feed
.icon
.or_else(|| feed.logo)
Expand Down
23 changes: 21 additions & 2 deletions crates/blend-feed/src/sanitize/html.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
use ammonia::Builder;
use std::collections::HashSet;

/// Extract safe and sanitized HTML elements.
const REMOVE_TAGS: [&str; 1] = ["article"];

// Sanitize HTML input, allowing only safe elements
pub fn sanitize_html(src: &str) -> String {
Builder::default().clean(src).to_string()
Builder::default().rm_tags(HashSet::from(REMOVE_TAGS)).clean(src).to_string()
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn it_keeps_only_safe_elements() {
let src = r#"<article><p>Some body text that we <em>want</em> to keep.</p><p class="read-more">[<a href="https://example.com">Read More</a>]</p><script>alert("gotcha")</script><style>body { display: none }</style></article>"#;

let parsed = sanitize_html(src);
assert_eq!(
parsed,
r#"<p>Some body text that we <em>want</em> to keep.</p><p>[<a href="https://example.com" rel="noopener noreferrer">Read More</a>]</p>"#
);
}
}
2 changes: 2 additions & 0 deletions crates/blend-feed/src/sanitize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ pub use text::sanitize_text;

mod html;
pub use html::sanitize_html;

// TODO: use `.url_relative(UrlRelative::RewriteWithBase(...))` with ammonia and pass in site URL to rewrite relative URLs
2 changes: 1 addition & 1 deletion crates/blend-feed/src/sanitize/text.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use ammonia::Builder;

/// Sanitize HTML input, allowing only text.
/// Sanitize HTML input, allowing only plain text
pub fn sanitize_text(src: &str) -> String {
Builder::empty().clean(src).to_string()
}
Expand Down
2 changes: 1 addition & 1 deletion crates/blend-worker/src/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub async fn fetch_entries(
id: entry.id,
url: entry.url,
title: entry.title,
summary: entry.summary_html,
summary_html: entry.summary_html,
content_html: entry.content_html,
published_at: entry.published_at,
updated_at: entry.updated_at,
Expand Down
Loading

0 comments on commit 19712c5

Please sign in to comment.