From bbf6234d327a0953cd2c6adcf3cf8f0da6c4aca6 Mon Sep 17 00:00:00 2001 From: nullchinchilla Date: Sun, 17 Nov 2024 08:21:57 -0500 Subject: [PATCH] Add regex dependency and implement fix_rss function to clean up RSS feed content --- Cargo.lock | 19 ++++++++++--------- Cargo.toml | 1 + src/bindercore_v2.rs | 14 ++++++++++++-- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1dbdd70..0491792 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2087,6 +2087,7 @@ dependencies = [ "r2d2", "r2d2_postgres", "rand 0.7.3", + "regex", "reqwest", "rsa", "rusty_pool", @@ -4293,14 +4294,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.3", - "regex-syntax 0.8.2", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", ] [[package]] @@ -4314,13 +4315,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.2", + "regex-syntax 0.8.5", ] [[package]] @@ -4331,9 +4332,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.2" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "replace_with" diff --git a/Cargo.toml b/Cargo.toml index 176282a..b50109e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,6 +84,7 @@ tmelcrypt = "0.2.7" sosistab2 = "0.8.16" http = "0.2" governor = "0.6.3" +regex = "1.11.1" [target.'cfg(not(target_env = "msvc"))'.dependencies] diff --git a/src/bindercore_v2.rs b/src/bindercore_v2.rs index 3002d2f..594dfdf 100644 --- a/src/bindercore_v2.rs +++ b/src/bindercore_v2.rs @@ -888,7 +888,8 @@ impl BinderCoreV2 { /// Gets announcements. pub async fn get_announcements(&self) -> String { - self.announcements_cache + let s = self + .announcements_cache .try_get_with((), async { let resp = reqwest::get("https://rsshub.app/telegram/channel/gephannounce_mirror") .compat() @@ -897,7 +898,8 @@ impl BinderCoreV2 { anyhow::Ok(String::from_utf8_lossy(&bts).to_string()) }) .await - .unwrap_or_else(|_| "Failed to fetch announcements".to_string()) + .unwrap_or_else(|_| "Failed to fetch announcements".to_string()); + fix_rss(&s) } async fn get_user_id(&self, credentials: &Credentials) -> Result, sqlx::Error> { @@ -978,6 +980,14 @@ impl BinderCoreV2 { } } +fn fix_rss(text: &str) -> String { + // Regular expression to match the forwarded pattern + let re = regex::Regex::new(r"

Forwarded[^<]*

").unwrap(); + + // Replace all matched patterns with empty string + re.replace_all(text.trim(), "").to_string() +} + /// Verify a captcha. async fn verify_captcha( captcha_service: &str,