Skip to content

Commit

Permalink
move html utility functions to util module
Browse files Browse the repository at this point in the history
  • Loading branch information
shouya committed Sep 27, 2024
1 parent 02c3293 commit 486213c
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 7 deletions.
3 changes: 1 addition & 2 deletions src/feed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ use serde::Deserialize;
use serde::Serialize;
use url::Url;

use crate::html::convert_relative_url;
use crate::html::html_body;
use crate::source::FromScratch;
use crate::util::{convert_relative_url, html_body};
use crate::Error;
use crate::Result;

Expand Down
4 changes: 2 additions & 2 deletions src/filter/full_text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use url::Url;

use crate::client::{self, Client};
use crate::feed::{Feed, Post};
use crate::html::convert_relative_url;
use crate::util::convert_relative_url;
use crate::util::TimedLruCache;
use crate::{ConfigError, Error, Result};

Expand Down Expand Up @@ -204,7 +204,7 @@ fn strip_post_content(
if simplify {
text = super::simplify_html::simplify(&text, link).unwrap_or(text);
} else {
text = crate::html::html_body(&text);
text = crate::util::html_body(&text);
}

if let Some(k) = keep_element.as_ref() {
Expand Down
2 changes: 1 addition & 1 deletion src/filter/highlight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use schemars::JsonSchema;
use scraper::{Html, Node};
use serde::{Deserialize, Serialize};

use crate::{feed::Feed, html::fragment_root_node_id, ConfigError, Result};
use crate::{feed::Feed, util::fragment_root_node_id, ConfigError, Result};

#[derive(
JsonSchema, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash,
Expand Down
2 changes: 1 addition & 1 deletion src/js/dom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use rquickjs::{
};
use scraper::ElementRef;

use crate::{html::fragment_root_node_id, Result};
use crate::{util::fragment_root_node_id, Result};

#[rquickjs::class]
#[derive(Clone)]
Expand Down
1 change: 0 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ mod error;
mod feed;
mod filter;
mod filter_pipeline;
mod html;
mod js;
mod otf_filter;
mod server;
Expand Down
4 changes: 4 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
mod html;

use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use url::Url;

pub use self::html::{convert_relative_url, fragment_root_node_id, html_body};

pub const USER_AGENT: &str =
concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"));

Expand Down
3 changes: 3 additions & 0 deletions src/html.rs → src/util/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const RELATIVE_URL_PROPERTIES: [(&str, &str); 3] = [
("*[srcset]", "srcset"),
];

/// Convert relative URLs in an HTML document to absolute URLs.
pub fn convert_relative_url(html: &mut Html, base_url: &str) {
use html5ever::{namespace_url, ns, LocalName, QualName};
lazy_static::lazy_static! {
Expand Down Expand Up @@ -46,6 +47,7 @@ pub fn convert_relative_url(html: &mut Html, base_url: &str) {
}
}

/// Extract the body HTML from an HTML document string.
pub fn html_body(html: &str) -> String {
Html::parse_document(html)
.select(&Selector::parse("body").unwrap())
Expand All @@ -54,6 +56,7 @@ pub fn html_body(html: &str) -> String {
.unwrap_or_else(|| html.to_string())
}

/// Get the root node ID of a fragment.
pub fn fragment_root_node_id(mut node: NodeRef<'_, scraper::Node>) -> NodeId {
let val = node.value();
if val.is_fragment() || val.is_document() {
Expand Down

0 comments on commit 486213c

Please sign in to comment.