diff --git a/src/feed.rs b/src/feed.rs index 887976a..4f9fcc6 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -11,9 +11,8 @@ use serde::Deserialize; use serde::Serialize; use url::Url; -use crate::html::convert_relative_url; -use crate::html::html_body; use crate::source::FromScratch; +use crate::util::{convert_relative_url, html_body}; use crate::Error; use crate::Result; diff --git a/src/filter/full_text.rs b/src/filter/full_text.rs index 52f9999..a391e89 100644 --- a/src/filter/full_text.rs +++ b/src/filter/full_text.rs @@ -9,7 +9,7 @@ use url::Url; use crate::client::{self, Client}; use crate::feed::{Feed, Post}; -use crate::html::convert_relative_url; +use crate::util::convert_relative_url; use crate::util::TimedLruCache; use crate::{ConfigError, Error, Result}; @@ -204,7 +204,7 @@ fn strip_post_content( if simplify { text = super::simplify_html::simplify(&text, link).unwrap_or(text); } else { - text = crate::html::html_body(&text); + text = crate::util::html_body(&text); } if let Some(k) = keep_element.as_ref() { diff --git a/src/filter/highlight.rs b/src/filter/highlight.rs index c095d04..ca5c506 100644 --- a/src/filter/highlight.rs +++ b/src/filter/highlight.rs @@ -5,7 +5,7 @@ use schemars::JsonSchema; use scraper::{Html, Node}; use serde::{Deserialize, Serialize}; -use crate::{feed::Feed, html::fragment_root_node_id, ConfigError, Result}; +use crate::{feed::Feed, util::fragment_root_node_id, ConfigError, Result}; #[derive( JsonSchema, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, diff --git a/src/js/dom.rs b/src/js/dom.rs index 0ddb47b..685be4d 100644 --- a/src/js/dom.rs +++ b/src/js/dom.rs @@ -8,7 +8,7 @@ use rquickjs::{ }; use scraper::ElementRef; -use crate::{html::fragment_root_node_id, Result}; +use crate::{util::fragment_root_node_id, Result}; #[rquickjs::class] #[derive(Clone)] diff --git a/src/main.rs b/src/main.rs index d4268b4..165f85c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,6 @@ mod error; mod feed; mod filter; mod filter_pipeline; -mod html; mod js; mod otf_filter; mod server; diff --git a/src/util.rs b/src/util.rs index 54d6c8a..5ac3f5b 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,7 +1,11 @@ +mod html; + use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use url::Url; +pub use self::html::{convert_relative_url, fragment_root_node_id, html_body}; + pub const USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")); diff --git a/src/html.rs b/src/util/html.rs similarity index 93% rename from src/html.rs rename to src/util/html.rs index c673037..b71b8c5 100644 --- a/src/html.rs +++ b/src/util/html.rs @@ -7,6 +7,7 @@ const RELATIVE_URL_PROPERTIES: [(&str, &str); 3] = [ ("*[srcset]", "srcset"), ]; +/// Convert relative URLs in an HTML document to absolute URLs. pub fn convert_relative_url(html: &mut Html, base_url: &str) { use html5ever::{namespace_url, ns, LocalName, QualName}; lazy_static::lazy_static! { @@ -46,6 +47,7 @@ pub fn convert_relative_url(html: &mut Html, base_url: &str) { } } +/// Extract the body HTML from an HTML document string. pub fn html_body(html: &str) -> String { Html::parse_document(html) .select(&Selector::parse("body").unwrap()) @@ -54,6 +56,7 @@ pub fn html_body(html: &str) -> String { .unwrap_or_else(|| html.to_string()) } +/// Get the root node ID of a fragment. pub fn fragment_root_node_id(mut node: NodeRef<'_, scraper::Node>) -> NodeId { let val = node.value(); if val.is_fragment() || val.is_document() {