From ebe8b567fdb9d011f574fc9053ad3f981b63930f Mon Sep 17 00:00:00 2001
From: Zak Nesler
Date: Sun, 26 May 2024 18:21:35 -0400
Subject: [PATCH] feed refactor and processing (#37)
* begin feed refactor
* ui updates
* keep icon url
* debounce feed invalidation
* sanitize fields
* rm id [skip ci]
* add more fields
* add line [skip ci]
* fetch column
* fix favicon url
* save favicon and render
* more required fields
* rename module
* add saved to stats
* count saved in total
* 404 styles
* improve styles and rm local entry store
* debounce stats invalidation
* rename classes
* render html and display title
* rename query state hook
* scrape wip
* add scraped_at field
* libxml
* sudo
* initial readability port
* add scraping job
* check content_html
* fix check
* trim and retry urls
* ui tweaks
* scrape favicons
* use b64
* readme [skip ci]
---
Cargo.lock | 1104 +++++++++++-
Cargo.toml | 1 +
README.md | 2 +-
.../migrations/20240422000000_feeds.sql | 5 +-
.../migrations/20240430000000_entries.sql | 5 +-
.../migrations/20240504000000_feeds_stats.sql | 15 -
crates/blend-db/src/model/entry.rs | 5 +-
crates/blend-db/src/model/feed.rs | 8 +-
crates/blend-db/src/repo/entry.rs | 41 +-
crates/blend-db/src/repo/feed.rs | 51 +-
crates/blend-feed/Cargo.toml | 7 +
crates/blend-feed/src/error.rs | 6 +
crates/blend-feed/src/extract/html.rs | 25 +
crates/blend-feed/src/extract/mod.rs | 10 +
crates/blend-feed/src/extract/stylistic.rs | 69 +
crates/blend-feed/src/extract/text.rs | 46 +
crates/blend-feed/src/lib.rs | 43 +-
crates/blend-feed/src/model.rs | 42 +-
crates/blend-feed/src/parse/entry.rs | 46 +
crates/blend-feed/src/parse/feed.rs | 31 +
crates/blend-feed/src/parse/mod.rs | 31 +
crates/blend-feed/src/parse/url.rs | 29 +
crates/blend-feed/src/readability/dom.rs | 116 ++
crates/blend-feed/src/readability/extract.rs | 55 +
crates/blend-feed/src/readability/mod.rs | 7 +
crates/blend-feed/src/readability/score.rs | 293 ++++
crates/blend-feed/src/scrape.rs | 14 +
crates/blend-web/src/context.rs | 4 +-
crates/blend-web/src/router/feed.rs | 25 +-
crates/blend-web/src/router/ws.rs | 6 +-
crates/blend-worker/Cargo.toml | 3 +
crates/blend-worker/src/error.rs | 6 +
crates/blend-worker/src/handler.rs | 95 +-
crates/blend-worker/src/job.rs | 6 +-
crates/blend-worker/src/notification.rs | 28 +-
crates/blend-worker/src/worker.rs | 22 +-
package.json | 4 +-
pnpm-lock.yaml | 1485 ++++++++---------
src/main.rs | 19 +-
tailwind.config.ts | 16 +-
ui/src/components/entry/entry-item.tsx | 73 +-
ui/src/components/entry/entry-list.tsx | 17 +-
ui/src/components/entry/entry-panel.tsx | 16 +-
ui/src/components/entry/entry-view.tsx | 17 +-
ui/src/components/feed/feed-header.tsx | 6 +-
ui/src/components/feed/feed-info.tsx | 4 +-
ui/src/components/feed/feed-item.tsx | 46 +-
ui/src/components/feed/feed-list.tsx | 6 +-
ui/src/components/menus/menu.tsx | 2 +-
ui/src/components/modals/create-feed.tsx | 9 +-
ui/src/components/nav/nav-row.tsx | 6 +-
ui/src/components/nav/nav-view-switcher.tsx | 8 +-
ui/src/components/ui/tooltip.tsx | 2 +-
ui/src/hooks/queries/use-feeds-stats.ts | 3 +-
ui/src/hooks/queries/use-infinite-entries.ts | 43 +-
ui/src/hooks/queries/use-invalidate-feed.ts | 26 +
ui/src/hooks/queries/use-invalidate-stats.ts | 20 +
ui/src/hooks/use-list-nav.ts | 8 +-
ui/src/hooks/use-notifications.ts | 20 +-
...se-filter-params.ts => use-query-state.ts} | 12 +-
ui/src/layouts/base.tsx | 2 +-
ui/src/routes/404.tsx | 20 +-
ui/src/routes/feed.tsx | 28 +-
ui/src/types/bindings.ts | 22 +-
ui/src/utils/entries.ts | 2 +-
65 files changed, 3044 insertions(+), 1200 deletions(-)
delete mode 100644 crates/blend-db/migrations/20240504000000_feeds_stats.sql
create mode 100644 crates/blend-feed/src/extract/html.rs
create mode 100644 crates/blend-feed/src/extract/mod.rs
create mode 100644 crates/blend-feed/src/extract/stylistic.rs
create mode 100644 crates/blend-feed/src/extract/text.rs
create mode 100644 crates/blend-feed/src/parse/entry.rs
create mode 100644 crates/blend-feed/src/parse/feed.rs
create mode 100644 crates/blend-feed/src/parse/mod.rs
create mode 100644 crates/blend-feed/src/parse/url.rs
create mode 100644 crates/blend-feed/src/readability/dom.rs
create mode 100644 crates/blend-feed/src/readability/extract.rs
create mode 100644 crates/blend-feed/src/readability/mod.rs
create mode 100644 crates/blend-feed/src/readability/score.rs
create mode 100644 crates/blend-feed/src/scrape.rs
create mode 100644 ui/src/hooks/queries/use-invalidate-feed.ts
create mode 100644 ui/src/hooks/queries/use-invalidate-stats.ts
rename ui/src/hooks/{use-filter-params.ts => use-query-state.ts} (81%)
diff --git a/Cargo.lock b/Cargo.lock
index 8752bf3e..b072ff2c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -39,6 +39,12 @@ dependencies = [
"memchr",
]
+[[package]]
+name = "aligned-vec"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1"
+
[[package]]
name = "alloc-no-stdlib"
version = "2.0.4"
@@ -60,6 +66,19 @@ version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f"
+[[package]]
+name = "ammonia"
+version = "4.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ab99eae5ee58501ab236beb6f20f6ca39be615267b014899c89b2f0bc18a459"
+dependencies = [
+ "html5ever 0.27.0",
+ "maplit",
+ "once_cell",
+ "tendril",
+ "url",
+]
+
[[package]]
name = "android-tzdata"
version = "0.1.1"
@@ -124,6 +143,29 @@ dependencies = [
"windows-sys 0.52.0",
]
+[[package]]
+name = "anyhow"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
+
+[[package]]
+name = "arbitrary"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
+
+[[package]]
+name = "arg_enum_proc_macro"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.66",
+]
+
[[package]]
name = "argon2"
version = "0.5.3"
@@ -136,6 +178,12 @@ dependencies = [
"password-hash",
]
+[[package]]
+name = "arrayvec"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+
[[package]]
name = "async-compression"
version = "0.4.10"
@@ -157,7 +205,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -178,12 +226,41 @@ dependencies = [
"bytemuck",
]
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
[[package]]
name = "autocfg"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
+[[package]]
+name = "av1-grain"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6678909d8c5d46a42abcf571271e15fdbc0a225e3646cf23762cd415046c78bf"
+dependencies = [
+ "anyhow",
+ "arrayvec",
+ "log",
+ "nom",
+ "num-rational",
+ "v_frame",
+]
+
+[[package]]
+name = "avif-serialize"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "876c75a42f6364451a033496a14c44bffe41f5f4a8236f697391f11024e596d2"
+dependencies = [
+ "arrayvec",
+]
+
[[package]]
name = "axum"
version = "0.7.5"
@@ -295,6 +372,12 @@ version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
+[[package]]
+name = "bit_field"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61"
+
[[package]]
name = "bitflags"
version = "1.3.2"
@@ -310,6 +393,12 @@ dependencies = [
"serde",
]
+[[package]]
+name = "bitstream-io"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c12d1856e42f0d817a835fe55853957c85c8c8a470114029143d3f12671446e"
+
[[package]]
name = "blake2"
version = "0.10.6"
@@ -382,12 +471,19 @@ dependencies = [
name = "blend-feed"
version = "0.0.0"
dependencies = [
+ "ammonia",
"chrono",
"feed-rs",
+ "futures",
+ "html5ever 0.27.0",
+ "lazy_static",
+ "markup5ever_rcdom",
+ "regex",
"reqwest",
"serde",
"serde_json",
"thiserror",
+ "url",
]
[[package]]
@@ -421,9 +517,11 @@ dependencies = [
name = "blend-worker"
version = "0.0.0"
dependencies = [
+ "base64 0.22.1",
"blend-config",
"blend-db",
"blend-feed",
+ "favilib",
"futures",
"reqwest",
"serde",
@@ -433,6 +531,7 @@ dependencies = [
"tokio",
"tracing",
"typeshare",
+ "url",
"uuid",
]
@@ -466,6 +565,12 @@ dependencies = [
"alloc-stdlib",
]
+[[package]]
+name = "built"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6a6c0b39c38fd754ac338b00a88066436389c0f029da5d37d1e01091d9b7c17"
+
[[package]]
name = "bumpalo"
version = "3.16.0"
@@ -484,6 +589,12 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+[[package]]
+name = "byteorder-lite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
+
[[package]]
name = "bytes"
version = "1.6.0"
@@ -492,9 +603,24 @@ checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
[[package]]
name = "cc"
-version = "1.0.97"
+version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4"
+checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
+dependencies = [
+ "jobserver",
+ "libc",
+ "once_cell",
+]
+
+[[package]]
+name = "cfg-expr"
+version = "0.15.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02"
+dependencies = [
+ "smallvec",
+ "target-lexicon",
+]
[[package]]
name = "cfg-if"
@@ -548,7 +674,7 @@ dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -557,6 +683,12 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
+[[package]]
+name = "color_quant"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
+
[[package]]
name = "colorchoice"
version = "1.0.1"
@@ -620,6 +752,34 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
+[[package]]
+name = "crc32fast"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
[[package]]
name = "crossbeam-queue"
version = "0.3.11"
@@ -631,9 +791,15 @@ dependencies = [
[[package]]
name = "crossbeam-utils"
-version = "0.8.19"
+version = "0.8.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
+
+[[package]]
+name = "crunchy"
+version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
+checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "crypto-common"
@@ -645,6 +811,29 @@ dependencies = [
"typenum",
]
+[[package]]
+name = "cssparser"
+version = "0.31.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be"
+dependencies = [
+ "cssparser-macros",
+ "dtoa-short",
+ "itoa",
+ "phf 0.11.2",
+ "smallvec",
+]
+
+[[package]]
+name = "cssparser-macros"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
+dependencies = [
+ "quote",
+ "syn 2.0.66",
+]
+
[[package]]
name = "darling"
version = "0.20.9"
@@ -666,7 +855,7 @@ dependencies = [
"proc-macro2",
"quote",
"strsim",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -677,7 +866,7 @@ checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178"
dependencies = [
"darling_core",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -706,6 +895,17 @@ dependencies = [
"powerfmt",
]
+[[package]]
+name = "derive_more"
+version = "0.99.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
[[package]]
name = "digest"
version = "0.10.7"
@@ -754,6 +954,27 @@ version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
+[[package]]
+name = "dtoa"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653"
+
+[[package]]
+name = "dtoa-short"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74"
+dependencies = [
+ "dtoa",
+]
+
+[[package]]
+name = "ego-tree"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
+
[[package]]
name = "either"
version = "1.12.0"
@@ -805,12 +1026,52 @@ version = "2.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
+[[package]]
+name = "exr"
+version = "1.72.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "887d93f60543e9a9362ef8a21beedd0a833c5d9610e18c67abe15a5963dcb1a4"
+dependencies = [
+ "bit_field",
+ "flume",
+ "half",
+ "lebe",
+ "miniz_oxide",
+ "rayon-core",
+ "smallvec",
+ "zune-inflate",
+]
+
[[package]]
name = "fastrand"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
+[[package]]
+name = "favilib"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7556cbc1d9d8a63a1b0c5956e3a1f27e9c4a22862899fadbb56f0c41ef4d952"
+dependencies = [
+ "anyhow",
+ "clap",
+ "image",
+ "reqwest",
+ "scraper",
+ "thiserror",
+ "url",
+]
+
+[[package]]
+name = "fdeflate"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645"
+dependencies = [
+ "simd-adler32",
+]
+
[[package]]
name = "feed-rs"
version = "1.5.2"
@@ -823,16 +1084,16 @@ dependencies = [
"regex",
"serde",
"serde_json",
- "siphasher",
+ "siphasher 1.0.1",
"url",
"uuid",
]
[[package]]
name = "figment"
-version = "0.10.18"
+version = "0.10.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d032832d74006f99547004d49410a4b4218e4c33382d56ca3ff89df74f86b953"
+checksum = "8cb01cd46b0cf372153850f4c6c272d9cbea2da513e07538405148f95bd789f3"
dependencies = [
"atomic",
"pear",
@@ -843,10 +1104,14 @@ dependencies = [
]
[[package]]
-name = "finl_unicode"
-version = "1.2.0"
+name = "flate2"
+version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6"
+checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
[[package]]
name = "flume"
@@ -889,6 +1154,16 @@ dependencies = [
"percent-encoding",
]
+[[package]]
+name = "futf"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
+dependencies = [
+ "mac",
+ "new_debug_unreachable",
+]
+
[[package]]
name = "futures"
version = "0.3.30"
@@ -956,7 +1231,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -989,6 +1264,15 @@ dependencies = [
"slab",
]
+[[package]]
+name = "fxhash"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
+dependencies = [
+ "byteorder",
+]
+
[[package]]
name = "generic-array"
version = "0.14.7"
@@ -999,17 +1283,36 @@ dependencies = [
"version_check",
]
+[[package]]
+name = "getopts"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
+dependencies = [
+ "unicode-width",
+]
+
[[package]]
name = "getrandom"
-version = "0.2.14"
+version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c"
+checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
+[[package]]
+name = "gif"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fb2d69b19215e18bb912fa30f7ce15846e301408695e44e0ef719f1da9e19f2"
+dependencies = [
+ "color_quant",
+ "weezl",
+]
+
[[package]]
name = "gimli"
version = "0.28.1"
@@ -1018,15 +1321,15 @@ checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "h2"
-version = "0.4.4"
+version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "816ec7294445779408f36fe57bc5b7fc1cf59664059096c65f905c1c61f58069"
+checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab"
dependencies = [
+ "atomic-waker",
"bytes",
"fnv",
"futures-core",
"futures-sink",
- "futures-util",
"http",
"indexmap",
"slab",
@@ -1035,6 +1338,16 @@ dependencies = [
"tracing",
]
+[[package]]
+name = "half"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+]
+
[[package]]
name = "hashbrown"
version = "0.14.5"
@@ -1108,6 +1421,34 @@ dependencies = [
"windows-sys 0.52.0",
]
+[[package]]
+name = "html5ever"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
+dependencies = [
+ "log",
+ "mac",
+ "markup5ever 0.11.0",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "html5ever"
+version = "0.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
+dependencies = [
+ "log",
+ "mac",
+ "markup5ever 0.12.1",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.66",
+]
+
[[package]]
name = "http"
version = "1.1.0"
@@ -1199,9 +1540,9 @@ dependencies = [
[[package]]
name = "hyper-util"
-version = "0.1.3"
+version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa"
+checksum = "3d8d52be92d09acc2e01dddb7fde3ad983fc6489c7db4837e605bc3fca4cb63e"
dependencies = [
"bytes",
"futures-channel",
@@ -1256,6 +1597,45 @@ dependencies = [
"unicode-normalization",
]
+[[package]]
+name = "image"
+version = "0.25.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11"
+dependencies = [
+ "bytemuck",
+ "byteorder",
+ "color_quant",
+ "exr",
+ "gif",
+ "image-webp",
+ "num-traits",
+ "png",
+ "qoi",
+ "ravif",
+ "rayon",
+ "rgb",
+ "tiff",
+ "zune-core",
+ "zune-jpeg",
+]
+
+[[package]]
+name = "image-webp"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d730b085583c4d789dfd07fdcf185be59501666a90c97c40162b37e4fdad272d"
+dependencies = [
+ "byteorder-lite",
+ "thiserror",
+]
+
+[[package]]
+name = "imgref"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126"
+
[[package]]
name = "indexmap"
version = "2.2.6"
@@ -1272,6 +1652,17 @@ version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
+[[package]]
+name = "interpolate_name"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.66",
+]
+
[[package]]
name = "ipnet"
version = "2.9.0"
@@ -1299,6 +1690,21 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
+[[package]]
+name = "jobserver"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "jpeg-decoder"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0"
+
[[package]]
name = "js-sys"
version = "0.3.69"
@@ -1332,11 +1738,28 @@ dependencies = [
"spin 0.5.2",
]
+[[package]]
+name = "lebe"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
+
[[package]]
name = "libc"
-version = "0.2.153"
+version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+
+[[package]]
+name = "libfuzzer-sys"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7"
+dependencies = [
+ "arbitrary",
+ "cc",
+ "once_cell",
+]
[[package]]
name = "libm"
@@ -1367,9 +1790,9 @@ dependencies = [
[[package]]
name = "linux-raw-sys"
-version = "0.4.13"
+version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
+checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "lock_api"
@@ -1387,12 +1810,83 @@ version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
+[[package]]
+name = "loop9"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062"
+dependencies = [
+ "imgref",
+]
+
+[[package]]
+name = "mac"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
+
+[[package]]
+name = "maplit"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
+
+[[package]]
+name = "markup5ever"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
+dependencies = [
+ "log",
+ "phf 0.10.1",
+ "phf_codegen 0.10.0",
+ "string_cache",
+ "string_cache_codegen",
+ "tendril",
+]
+
+[[package]]
+name = "markup5ever"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
+dependencies = [
+ "log",
+ "phf 0.11.2",
+ "phf_codegen 0.11.2",
+ "string_cache",
+ "string_cache_codegen",
+ "tendril",
+]
+
+[[package]]
+name = "markup5ever_rcdom"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18"
+dependencies = [
+ "html5ever 0.27.0",
+ "markup5ever 0.12.1",
+ "tendril",
+ "xml5ever",
+]
+
[[package]]
name = "matchit"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+[[package]]
+name = "maybe-rayon"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
+dependencies = [
+ "cfg-if",
+ "rayon",
+]
+
[[package]]
name = "md-5"
version = "0.10.6"
@@ -1433,11 +1927,12 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
-version = "0.7.2"
+version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
+checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae"
dependencies = [
"adler",
+ "simd-adler32",
]
[[package]]
@@ -1469,6 +1964,12 @@ dependencies = [
"tempfile",
]
+[[package]]
+name = "new_debug_unreachable"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
+
[[package]]
name = "nom"
version = "7.1.3"
@@ -1479,6 +1980,12 @@ dependencies = [
"minimal-lexical",
]
+[[package]]
+name = "noop_proc_macro"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8"
+
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
@@ -1489,6 +1996,16 @@ dependencies = [
"winapi",
]
+[[package]]
+name = "num-bigint"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
[[package]]
name = "num-bigint-dig"
version = "0.8.4"
@@ -1512,6 +2029,17 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+[[package]]
+name = "num-derive"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.66",
+]
+
[[package]]
name = "num-integer"
version = "0.1.46"
@@ -1522,12 +2050,23 @@ dependencies = [
]
[[package]]
-name = "num-iter"
-version = "0.1.45"
+name = "num-iter"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
- "autocfg",
+ "num-bigint",
"num-integer",
"num-traits",
]
@@ -1590,7 +2129,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -1601,9 +2140,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-src"
-version = "300.2.3+3.2.1"
+version = "300.3.0+3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cff92b6f71555b61bb9315f7c64da3ca43d87531622120fea0195fc761b4843"
+checksum = "eba8804a1c5765b18c4b3f907e6897ebabeedebc9830e1a0046c4a4cf44663e1"
dependencies = [
"cc",
]
@@ -1635,9 +2174,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
-version = "0.12.2"
+version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb"
+checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
"lock_api",
"parking_lot_core",
@@ -1693,7 +2232,7 @@ dependencies = [
"proc-macro2",
"proc-macro2-diagnostics",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -1711,6 +2250,96 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+[[package]]
+name = "phf"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
+dependencies = [
+ "phf_shared 0.10.0",
+]
+
+[[package]]
+name = "phf"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
+dependencies = [
+ "phf_macros",
+ "phf_shared 0.11.2",
+]
+
+[[package]]
+name = "phf_codegen"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
+dependencies = [
+ "phf_generator 0.10.0",
+ "phf_shared 0.10.0",
+]
+
+[[package]]
+name = "phf_codegen"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
+dependencies = [
+ "phf_generator 0.11.2",
+ "phf_shared 0.11.2",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
+dependencies = [
+ "phf_shared 0.10.0",
+ "rand",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
+dependencies = [
+ "phf_shared 0.11.2",
+ "rand",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
+dependencies = [
+ "phf_generator 0.11.2",
+ "phf_shared 0.11.2",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.66",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
+dependencies = [
+ "siphasher 0.3.11",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b"
+dependencies = [
+ "siphasher 0.3.11",
+]
+
[[package]]
name = "pin-project"
version = "1.1.5"
@@ -1728,7 +2357,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -1770,6 +2399,19 @@ version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
+[[package]]
+name = "png"
+version = "0.17.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06e4b0d3d1312775e782c86c91a111aa1f910cbb65e1337f9975b5f9a554b5e1"
+dependencies = [
+ "bitflags 1.3.2",
+ "crc32fast",
+ "fdeflate",
+ "flate2",
+ "miniz_oxide",
+]
+
[[package]]
name = "powerfmt"
version = "0.2.0"
@@ -1782,6 +2424,12 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+[[package]]
+name = "precomputed-hash"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
+
[[package]]
name = "proc-macro-error"
version = "1.0.4"
@@ -1808,9 +2456,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
-version = "1.0.82"
+version = "1.0.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b"
+checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6"
dependencies = [
"unicode-ident",
]
@@ -1823,11 +2471,45 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
"version_check",
"yansi",
]
+[[package]]
+name = "profiling"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58"
+dependencies = [
+ "profiling-procmacros",
+]
+
+[[package]]
+name = "profiling-procmacros"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd"
+dependencies = [
+ "quote",
+ "syn 2.0.66",
+]
+
+[[package]]
+name = "qoi"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "quick-error"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
+
[[package]]
name = "quick-xml"
version = "0.31.0"
@@ -1877,6 +2559,76 @@ dependencies = [
"getrandom",
]
+[[package]]
+name = "rav1e"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9"
+dependencies = [
+ "arbitrary",
+ "arg_enum_proc_macro",
+ "arrayvec",
+ "av1-grain",
+ "bitstream-io",
+ "built",
+ "cfg-if",
+ "interpolate_name",
+ "itertools",
+ "libc",
+ "libfuzzer-sys",
+ "log",
+ "maybe-rayon",
+ "new_debug_unreachable",
+ "noop_proc_macro",
+ "num-derive",
+ "num-traits",
+ "once_cell",
+ "paste",
+ "profiling",
+ "rand",
+ "rand_chacha",
+ "simd_helpers",
+ "system-deps",
+ "thiserror",
+ "v_frame",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "ravif"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc13288f5ab39e6d7c9d501759712e6969fcc9734220846fc9ed26cae2cc4234"
+dependencies = [
+ "avif-serialize",
+ "imgref",
+ "loop9",
+ "quick-error",
+ "rav1e",
+ "rayon",
+ "rgb",
+]
+
+[[package]]
+name = "rayon"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
[[package]]
name = "redox_syscall"
version = "0.4.1"
@@ -1944,6 +2696,7 @@ dependencies = [
"base64 0.22.1",
"bytes",
"encoding_rs",
+ "futures-channel",
"futures-core",
"futures-util",
"h2",
@@ -1977,6 +2730,15 @@ dependencies = [
"winreg",
]
+[[package]]
+name = "rgb"
+version = "0.8.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8"
+dependencies = [
+ "bytemuck",
+]
+
[[package]]
name = "rsa"
version = "0.9.6"
@@ -2018,7 +2780,7 @@ dependencies = [
"quote",
"rust-embed-utils",
"shellexpand",
- "syn 2.0.64",
+ "syn 2.0.66",
"walkdir",
]
@@ -2103,6 +2865,22 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+[[package]]
+name = "scraper"
+version = "0.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b80b33679ff7a0ea53d37f3b39de77ea0c75b12c5805ac43ec0c33b3051af1b"
+dependencies = [
+ "ahash",
+ "cssparser",
+ "ego-tree",
+ "getopts",
+ "html5ever 0.26.0",
+ "once_cell",
+ "selectors",
+ "tendril",
+]
+
[[package]]
name = "security-framework"
version = "2.11.0"
@@ -2126,24 +2904,43 @@ dependencies = [
"libc",
]
+[[package]]
+name = "selectors"
+version = "0.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06"
+dependencies = [
+ "bitflags 2.5.0",
+ "cssparser",
+ "derive_more",
+ "fxhash",
+ "log",
+ "new_debug_unreachable",
+ "phf 0.10.1",
+ "phf_codegen 0.10.0",
+ "precomputed-hash",
+ "servo_arc",
+ "smallvec",
+]
+
[[package]]
name = "serde"
-version = "1.0.202"
+version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395"
+checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
-version = "1.0.202"
+version = "1.0.203"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838"
+checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -2188,6 +2985,15 @@ dependencies = [
"serde",
]
+[[package]]
+name = "servo_arc"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44"
+dependencies = [
+ "stable_deref_trait",
+]
+
[[package]]
name = "sha1"
version = "0.10.6"
@@ -2238,6 +3044,27 @@ dependencies = [
"rand_core",
]
+[[package]]
+name = "simd-adler32"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+
+[[package]]
+name = "simd_helpers"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6"
+dependencies = [
+ "quote",
+]
+
+[[package]]
+name = "siphasher"
+version = "0.3.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
+
[[package]]
name = "siphasher"
version = "1.0.1"
@@ -2507,15 +3334,47 @@ dependencies = [
"uuid",
]
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
+[[package]]
+name = "string_cache"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
+dependencies = [
+ "new_debug_unreachable",
+ "once_cell",
+ "parking_lot",
+ "phf_shared 0.10.0",
+ "precomputed-hash",
+ "serde",
+]
+
+[[package]]
+name = "string_cache_codegen"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
+dependencies = [
+ "phf_generator 0.10.0",
+ "phf_shared 0.10.0",
+ "proc-macro2",
+ "quote",
+]
+
[[package]]
name = "stringprep"
-version = "0.1.4"
+version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb41d74e231a107a1b4ee36bd1214b11285b77768d2e3824aedafa988fd36ee6"
+checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1"
dependencies = [
- "finl_unicode",
"unicode-bidi",
"unicode-normalization",
+ "unicode-properties",
]
[[package]]
@@ -2543,9 +3402,9 @@ dependencies = [
[[package]]
name = "syn"
-version = "2.0.64"
+version = "2.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ad3dee41f36859875573074334c200d1add8e4a87bb37113ebd31d926b7b11f"
+checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
dependencies = [
"proc-macro2",
"quote",
@@ -2585,6 +3444,25 @@ dependencies = [
"libc",
]
+[[package]]
+name = "system-deps"
+version = "6.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349"
+dependencies = [
+ "cfg-expr",
+ "heck 0.5.0",
+ "pkg-config",
+ "toml",
+ "version-compare",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.12.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f"
+
[[package]]
name = "tempfile"
version = "3.10.1"
@@ -2597,24 +3475,35 @@ dependencies = [
"windows-sys 0.52.0",
]
+[[package]]
+name = "tendril"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
+dependencies = [
+ "futf",
+ "mac",
+ "utf-8",
+]
+
[[package]]
name = "thiserror"
-version = "1.0.60"
+version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18"
+checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
-version = "1.0.60"
+version = "1.0.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524"
+checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -2627,6 +3516,17 @@ dependencies = [
"once_cell",
]
+[[package]]
+name = "tiff"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e"
+dependencies = [
+ "flate2",
+ "jpeg-decoder",
+ "weezl",
+]
+
[[package]]
name = "time"
version = "0.3.36"
@@ -2698,7 +3598,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -2873,7 +3773,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -2962,7 +3862,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a615d6c2764852a2e88a4f16e9ce1ea49bb776b5872956309e170d63a042a34f"
dependencies = [
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -3004,12 +3904,24 @@ dependencies = [
"tinyvec",
]
+[[package]]
+name = "unicode-properties"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291"
+
[[package]]
name = "unicode-segmentation"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
+[[package]]
+name = "unicode-width"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6"
+
[[package]]
name = "unicode_categories"
version = "0.1.1"
@@ -3056,6 +3968,17 @@ dependencies = [
"serde",
]
+[[package]]
+name = "v_frame"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6f32aaa24bacd11e488aa9ba66369c7cd514885742c9fe08cfe85884db3e92b"
+dependencies = [
+ "aligned-vec",
+ "num-traits",
+ "wasm-bindgen",
+]
+
[[package]]
name = "validator"
version = "0.18.1"
@@ -3083,7 +4006,7 @@ dependencies = [
"proc-macro-error",
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
@@ -3098,6 +4021,12 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+[[package]]
+name = "version-compare"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b"
+
[[package]]
name = "version_check"
version = "0.9.4"
@@ -3156,7 +4085,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
"wasm-bindgen-shared",
]
@@ -3190,7 +4119,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@@ -3211,6 +4140,12 @@ dependencies = [
"wasm-bindgen",
]
+[[package]]
+name = "weezl"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082"
+
[[package]]
name = "whoami"
version = "1.5.1"
@@ -3419,6 +4354,17 @@ dependencies = [
"windows-sys 0.48.0",
]
+[[package]]
+name = "xml5ever"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c376f76ed09df711203e20c3ef5ce556f0166fa03d39590016c0fd625437fad"
+dependencies = [
+ "log",
+ "mac",
+ "markup5ever 0.12.1",
+]
+
[[package]]
name = "yansi"
version = "1.0.1"
@@ -3442,11 +4388,35 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b"
dependencies = [
"proc-macro2",
"quote",
- "syn 2.0.64",
+ "syn 2.0.66",
]
[[package]]
name = "zeroize"
-version = "1.7.0"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
+
+[[package]]
+name = "zune-core"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a"
+
+[[package]]
+name = "zune-inflate"
+version = "0.2.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02"
+dependencies = [
+ "simd-adler32",
+]
+
+[[package]]
+name = "zune-jpeg"
+version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
+checksum = "ec866b44a2a1fd6133d363f073ca1b179f438f99e7e5bfb1e33f7181facfe448"
+dependencies = [
+ "zune-core",
+]
diff --git a/Cargo.toml b/Cargo.toml
index df6afd94..565aee46 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ thiserror = "1.0"
tokio = { version = "1.37", features = ["macros", "rt-multi-thread"] }
tracing = "0.1"
typeshare = "1.0"
+url = "2.5"
uuid = { version = "1.8", features = ["v4", "fast-rng", "serde"] }
[package]
diff --git a/README.md b/README.md
index 63247309..5d4772a2 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Demo is available at [blend.zak.fm](https://blend.zak.fm).
- [x] Fetch metadata and feed entries in background
- [x] Notifications via websocket
- [x] Automatic + manual refreshing
-- [ ] Scrape HTML if entries do not contain article content
+- [x] Scrape HTML if entries do not contain article content
- [ ] Organize feeds into folders
- [ ] UI options for theme, font, etc.
- [ ] Keyboard shortcuts
diff --git a/crates/blend-db/migrations/20240422000000_feeds.sql b/crates/blend-db/migrations/20240422000000_feeds.sql
index 3273f6d2..5a40ebb8 100644
--- a/crates/blend-db/migrations/20240422000000_feeds.sql
+++ b/crates/blend-db/migrations/20240422000000_feeds.sql
@@ -2,9 +2,10 @@ CREATE TABLE IF NOT EXISTS feeds (
uuid TEXT PRIMARY KEY NOT NULL,
id TEXT NOT NULL,
url_feed TEXT NOT NULL,
- url_site TEXT,
- title TEXT,
+ url_site TEXT NOT NULL,
+ title TEXT NOT NULL,
title_display TEXT,
+ favicon_url TEXT,
favicon_b64 BLOB,
published_at DATETIME,
updated_at DATETIME,
diff --git a/crates/blend-db/migrations/20240430000000_entries.sql b/crates/blend-db/migrations/20240430000000_entries.sql
index 770a116f..a62eab72 100644
--- a/crates/blend-db/migrations/20240430000000_entries.sql
+++ b/crates/blend-db/migrations/20240430000000_entries.sql
@@ -4,12 +4,15 @@ CREATE TABLE IF NOT EXISTS entries (
id TEXT NOT NULL,
url TEXT,
title TEXT,
- summary TEXT,
+ summary_html TEXT,
content_html TEXT,
content_scraped_html TEXT,
+ media_url TEXT,
published_at DATETIME,
updated_at DATETIME,
read_at DATETIME,
+ saved_at DATETIME,
+ scraped_at DATETIME,
UNIQUE(feed_uuid, id),
CONSTRAINT fk_feed FOREIGN KEY (feed_uuid) REFERENCES feeds(uuid) ON DELETE CASCADE
);
diff --git a/crates/blend-db/migrations/20240504000000_feeds_stats.sql b/crates/blend-db/migrations/20240504000000_feeds_stats.sql
deleted file mode 100644
index fb49de20..00000000
--- a/crates/blend-db/migrations/20240504000000_feeds_stats.sql
+++ /dev/null
@@ -1,15 +0,0 @@
-CREATE VIEW IF NOT EXISTS feeds_stats AS
-SELECT
- feeds.uuid,
- COUNT(entries.uuid) as count_total,
- COUNT(
- CASE
- WHEN entries.read_at IS NULL THEN 1
- ELSE NULL
- END
- ) as count_unread
-FROM
- feeds
- INNER JOIN entries ON feeds.uuid = entries.feed_uuid
-GROUP BY
- feeds.uuid
diff --git a/crates/blend-db/src/model/entry.rs b/crates/blend-db/src/model/entry.rs
index 4833a174..7a9bbc1b 100644
--- a/crates/blend-db/src/model/entry.rs
+++ b/crates/blend-db/src/model/entry.rs
@@ -12,12 +12,15 @@ pub struct Entry {
pub id: String,
pub url: String,
pub title: Option,
- pub summary: Option,
+ pub summary_html: Option,
#[sqlx(default)]
pub content_html: Option,
#[sqlx(default)]
pub content_scraped_html: Option,
+ pub media_url: Option,
pub published_at: Option>,
pub updated_at: Option>,
pub read_at: Option>,
+ pub saved_at: Option>,
+ pub scraped_at: Option>,
}
diff --git a/crates/blend-db/src/model/feed.rs b/crates/blend-db/src/model/feed.rs
index 29949d9e..c443c793 100644
--- a/crates/blend-db/src/model/feed.rs
+++ b/crates/blend-db/src/model/feed.rs
@@ -10,10 +10,11 @@ pub struct Feed {
pub uuid: Uuid,
pub id: String,
pub url_feed: String,
- pub url_site: Option,
- pub title: Option,
+ pub url_site: String,
+ pub title: String,
pub title_display: Option,
- pub favicon_b64: Option>,
+ pub favicon_b64: Option,
+ pub favicon_url: Option,
pub published_at: Option>,
pub updated_at: Option>,
}
@@ -24,4 +25,5 @@ pub struct FeedStats {
pub uuid: Uuid,
pub count_total: u32,
pub count_unread: u32,
+ pub count_saved: u32,
}
diff --git a/crates/blend-db/src/repo/entry.rs b/crates/blend-db/src/repo/entry.rs
index 12e61e2d..8502eb08 100644
--- a/crates/blend-db/src/repo/entry.rs
+++ b/crates/blend-db/src/repo/entry.rs
@@ -14,8 +14,9 @@ pub struct CreateEntryParams {
pub id: String,
pub url: Option,
pub title: Option,
- pub summary: Option,
+ pub summary_html: Option,
pub content_html: Option,
+ pub media_url: Option,
pub published_at: Option>,
pub updated_at: Option>,
}
@@ -86,7 +87,7 @@ impl EntryRepo {
let el = filter.sort.query_elements();
let el_inv = filter.sort.query_elements_inverse();
- let mut query = QueryBuilder::::new("SELECT uuid, feed_uuid, id, url, title, summary, published_at, updated_at, read_at FROM entries WHERE 1=1");
+ let mut query = QueryBuilder::::new("SELECT uuid, feed_uuid, id, url, title, summary_html, media_url, published_at, updated_at, read_at, saved_at, scraped_at FROM entries WHERE 1=1");
match filter.view {
View::All => query.push(""),
@@ -146,6 +147,17 @@ impl EntryRepo {
.map_err(|err| err.into())
}
+ pub async fn get_entries_to_scrape(
+ &self,
+ feed_uuid: &uuid::Uuid,
+ ) -> DbResult> {
+ sqlx::query_as::<_, model::Entry>("SELECT * FROM entries WHERE feed_uuid = ?1 AND content_html IS NULL AND content_scraped_html IS NULL AND scraped_at IS NULL")
+ .bind(feed_uuid)
+ .fetch_all(&self.db)
+ .await
+ .map_err(|err| err.into())
+ }
+
pub async fn update_entry_as_read(&self, entry_uuid: &uuid::Uuid) -> DbResult {
let rows_affected = sqlx::query("UPDATE entries SET read_at = ?1 WHERE uuid = ?2")
.bind(Utc::now())
@@ -176,15 +188,16 @@ impl EntryRepo {
return Ok(vec![]);
}
- let mut query = QueryBuilder::::new("INSERT INTO entries (feed_uuid, uuid, id, url, title, summary, content_html, published_at, updated_at) ");
+ let mut query = QueryBuilder::::new("INSERT INTO entries (feed_uuid, uuid, id, url, title, summary_html, content_html, media_url, published_at, updated_at) ");
query.push_values(entries.iter(), |mut b, entry| {
b.push_bind(feed_uuid)
.push_bind(uuid::Uuid::new_v4())
.push_bind(entry.id.clone())
.push_bind(entry.url.clone())
.push_bind(entry.title.clone())
- .push_bind(entry.summary.clone())
+ .push_bind(entry.summary_html.clone())
.push_bind(entry.content_html.clone())
+ .push_bind(entry.media_url.clone())
.push_bind(entry.published_at)
.push_bind(entry.updated_at);
});
@@ -194,7 +207,7 @@ impl EntryRepo {
DO UPDATE SET
url = excluded.url,
title = excluded.title,
- summary = excluded.summary,
+ summary_html = excluded.summary_html,
content_html = excluded.content_html,
updated_at = excluded.updated_at
RETURNING uuid
@@ -209,4 +222,22 @@ impl EntryRepo {
.map(|row| row.try_get("uuid").map_err(|err| err.into()))
.collect::>>()
}
+
+ pub async fn update_scraped_entry(
+ &self,
+ entry_uuid: &uuid::Uuid,
+ content_scraped_html: Option,
+ ) -> DbResult {
+ let rows_affected = sqlx::query(
+ "UPDATE entries SET content_scraped_html = ?1, scraped_at = ?2 WHERE uuid = ?3",
+ )
+ .bind(content_scraped_html)
+ .bind(Utc::now())
+ .bind(entry_uuid)
+ .execute(&self.db)
+ .await?
+ .rows_affected();
+
+ Ok(rows_affected > 0)
+ }
}
diff --git a/crates/blend-db/src/repo/feed.rs b/crates/blend-db/src/repo/feed.rs
index ff5e29de..4ab32fca 100644
--- a/crates/blend-db/src/repo/feed.rs
+++ b/crates/blend-db/src/repo/feed.rs
@@ -7,8 +7,10 @@ pub struct FeedRepo {
pub struct CreateFeedParams {
pub id: String,
- pub title: Option,
- pub url_feed: Option,
+ pub title: String,
+ pub url_feed: String,
+ pub url_site: String,
+ pub favicon_url: Option,
pub published_at: Option>,
pub updated_at: Option>,
}
@@ -26,10 +28,21 @@ impl FeedRepo {
}
pub async fn get_stats(&self) -> DbResult> {
- sqlx::query_as::<_, model::FeedStats>("SELECT * from feeds_stats")
- .fetch_all(&self.db)
- .await
- .map_err(|err| err.into())
+ sqlx::query_as::<_, model::FeedStats>(
+ r#"
+ SELECT
+ feeds.uuid,
+ COUNT(entries.uuid) as count_total,
+ COUNT(CASE WHEN entries.read_at IS NULL THEN 1 ELSE NULL END) as count_unread,
+ COUNT(CASE WHEN entries.saved_at IS NOT NULL THEN 1 ELSE NULL END) as count_saved
+ FROM feeds
+ INNER JOIN entries ON feeds.uuid = entries.feed_uuid
+ GROUP BY feeds.uuid
+ "#,
+ )
+ .fetch_all(&self.db)
+ .await
+ .map_err(|err| err.into())
}
pub async fn get_feed(&self, feed_uuid: uuid::Uuid) -> DbResult
"#;
+
+ let parsed = extract_stylistic_html(src);
+ assert_eq!(parsed, r#"Text with div and span tags."#);
+ }
+
+ #[test]
+ fn it_allows_a_tags() {
+ let src = r#"Text with link inside.
"#;
+
+ let parsed = extract_stylistic_html(src);
+ assert_eq!(
+ parsed,
+ r#"Text with link inside."#
+ );
+ }
+}
diff --git a/crates/blend-feed/src/extract/text.rs b/crates/blend-feed/src/extract/text.rs
new file mode 100644
index 00000000..3db7ec47
--- /dev/null
+++ b/crates/blend-feed/src/extract/text.rs
@@ -0,0 +1,46 @@
+use ammonia::Builder;
+
+/// Sanitize HTML input, allowing only plain text
+pub fn extract_text(src: &str) -> String {
+ Builder::empty().clean(src).to_string()
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn it_extracts_only_stylistic_elements() {
+ let src = r#"
Some body text that we want to keep.
[Read More]
"#;
+
+ let parsed = extract_text(src);
+ assert_eq!(
+ parsed,
+ r#"Some body text that we want to keep. [Read More]"#
+ );
+ }
+
+ #[test]
+ fn it_allows_strong_and_b_tags() {
+ let src = r#"Text with strong and bold tags.
"#;
+
+ let parsed = extract_text(src);
+ assert_eq!(parsed, r#"Text with strong and bold tags."#);
+ }
+
+ #[test]
+ fn it_flattens_span_and_div() {
+ let src = r#"Text with
div
and span
tags."#;
+
+ let parsed = extract_text(src);
+ assert_eq!(parsed, r#"Text with div and span tags."#);
+ }
+
+ #[test]
+ fn it_removes_a_tags() {
+ let src = r#"Text with link inside.
"#;
+
+ let parsed = extract_text(src);
+ assert_eq!(parsed, r#"Text with link inside."#);
+ }
+}
diff --git a/crates/blend-feed/src/lib.rs b/crates/blend-feed/src/lib.rs
index 6db02c62..3244ffd9 100644
--- a/crates/blend-feed/src/lib.rs
+++ b/crates/blend-feed/src/lib.rs
@@ -1,40 +1,17 @@
-use error::FeedResult;
+#[macro_use]
+extern crate lazy_static;
mod error;
-pub mod model;
-
pub use error::FeedError as Error;
-pub async fn parse_feed(url: &str) -> FeedResult {
- let data = reqwest::get(url).await?.text().await?;
- let mut feed: model::ParsedFeed = feed_rs::parser::parse(data.as_bytes())
- .or_else(|err| match err {
- feed_rs::parser::ParseFeedError::ParseError(_) => todo!(), // fallback to URL and look for feed URL in
- _ => Err(err),
- })?
- .into();
-
- // TODO: need a nicer way of doing this
- if feed.url.is_none() {
- feed.url = Some(url.to_owned());
- }
-
- Ok(feed)
-}
+pub mod model;
+mod readability;
-pub async fn parse_entries(url: &str) -> FeedResult> {
- let data = reqwest::get(url).await?.text().await?;
- let feed = feed_rs::parser::parse(data.as_bytes()).or_else(|err| match err {
- feed_rs::parser::ParseFeedError::ParseError(_) => todo!(),
- _ => Err(err),
- })?;
+mod scrape;
+pub use scrape::scrape_entry;
- let entries = feed
- .entries
- .iter()
- .cloned()
- .map(|entry| entry.into())
- .collect::>();
+mod parse;
+pub use parse::{entry::*, feed::*, url::*};
- Ok(entries)
-}
+mod extract;
+pub use extract::{extract_html, extract_stylistic_html, extract_text};
diff --git a/crates/blend-feed/src/model.rs b/crates/blend-feed/src/model.rs
index bb9e542e..0340905d 100644
--- a/crates/blend-feed/src/model.rs
+++ b/crates/blend-feed/src/model.rs
@@ -1,54 +1,26 @@
use chrono::{DateTime, Utc};
+/// Intermediate model for mapping/tweaking feeds from feed-rs.
#[derive(Debug, Clone)]
pub struct ParsedFeed {
pub id: String,
+ pub url_feed: String,
+ pub url_site: String,
pub title: Option,
- pub url: Option,
- pub favicon: Option,
+ pub favicon_url: Option,
pub published_at: Option>,
pub updated_at: Option>,
}
-impl From for ParsedFeed {
- fn from(value: feed_rs::model::Feed) -> Self {
- Self {
- id: value.id,
- url: None, // TODO: normalize a way of getting this URL, should fallback sanitized user-submitted URL
- title: value.title.map(|title| title.content),
- favicon: None,
- published_at: value.published,
- updated_at: value.updated,
- }
- }
-}
-
+/// Intermediate model for mapping/tweaking entries from feed-rs.
#[derive(Debug, Clone)]
pub struct ParsedEntry {
pub id: String,
pub url: Option,
pub title: Option,
- pub summary: Option,
+ pub summary_html: Option,
pub content_html: Option,
+ pub media_url: Option,
pub published_at: Option>,
pub updated_at: Option>,
}
-
-impl From for ParsedEntry {
- fn from(value: feed_rs::model::Entry) -> Self {
- let link = value
- .links
- .iter()
- .find(|link| link.rel.as_ref().is_some_and(|rel| rel == "self"));
-
- Self {
- id: value.id,
- url: link.map(|link| link.href.clone()), // TODO: normalize this url
- title: value.title.map(|title| title.content),
- summary: value.summary.map(|summary| summary.content),
- content_html: value.content.and_then(|content| content.body), // TODO: normalize and sanitize this
- published_at: value.published,
- updated_at: value.updated,
- }
- }
-}
diff --git a/crates/blend-feed/src/parse/entry.rs b/crates/blend-feed/src/parse/entry.rs
new file mode 100644
index 00000000..7d7979d4
--- /dev/null
+++ b/crates/blend-feed/src/parse/entry.rs
@@ -0,0 +1,46 @@
+use super::get_feed;
+use crate::{
+ error::{FeedError, FeedResult},
+ extract::*,
+ model::{self, ParsedEntry},
+ parse_url,
+};
+
+/// Fetch feed and process each entry as needed
+pub async fn parse_entries(url: &str) -> FeedResult> {
+ let url = parse_url(url).ok_or_else(|| FeedError::InvalidUrl(url.to_string()))?;
+
+ // Parse feed
+ let (feed, _) = get_feed(url).await?;
+
+ let entries = feed
+ .entries
+ .iter()
+ .cloned()
+ .map(|entry| {
+ // TODO: Find the best link somehow? Maybe not always the first?
+ let url = entry.links.first().map(|link| link.href.clone());
+
+ let media_url = entry
+ .media
+ .first()
+ .and_then(|media| media.content.first().and_then(|content| content.url.clone()))
+ .map(|url| url.to_string());
+
+ ParsedEntry {
+ id: entry.id,
+ url,
+ title: entry.title.map(|text| extract_text(&text.content)),
+ summary_html: entry.summary.map(|text| extract_stylistic_html(&text.content)),
+ content_html: entry
+ .content
+ .and_then(|content| content.body.map(|content| extract_html(&content))),
+ media_url,
+ published_at: entry.published,
+ updated_at: entry.updated,
+ }
+ })
+ .collect::>();
+
+ Ok(entries)
+}
diff --git a/crates/blend-feed/src/parse/feed.rs b/crates/blend-feed/src/parse/feed.rs
new file mode 100644
index 00000000..e549398a
--- /dev/null
+++ b/crates/blend-feed/src/parse/feed.rs
@@ -0,0 +1,31 @@
+use super::get_feed;
+use crate::{
+ error::{FeedError, FeedResult},
+ extract::*,
+ model::ParsedFeed,
+ parse_url,
+};
+
+// Fetch feed and process the basic feed data
+pub async fn parse_feed(url: &str) -> FeedResult {
+ let url = parse_url(url).ok_or_else(|| FeedError::InvalidUrl(url.to_string()))?;
+ let url_feed = url.base.clone();
+
+ // Parse feed and get URL that we used to scrape the content
+ let (feed, url) = get_feed(url.clone()).await?;
+
+ // Parse favicon URL to use until we can convert the remote image into binary data stored in the db
+ let favicon_url = feed.icon.or_else(|| feed.logo).map(|image| image.uri);
+
+ let parsed = ParsedFeed {
+ id: feed.id,
+ url_feed: url,
+ url_site: url_feed,
+ title: feed.title.map(|text| extract_text(&text.content)),
+ favicon_url,
+ published_at: feed.published,
+ updated_at: feed.updated,
+ };
+
+ Ok(parsed)
+}
diff --git a/crates/blend-feed/src/parse/mod.rs b/crates/blend-feed/src/parse/mod.rs
new file mode 100644
index 00000000..1d14a563
--- /dev/null
+++ b/crates/blend-feed/src/parse/mod.rs
@@ -0,0 +1,31 @@
+use self::url::ParsedUrl;
+use crate::error::{FeedError, FeedResult};
+
+pub(crate) mod entry;
+pub(crate) mod feed;
+pub(crate) mod url;
+
+/// Fetch and parse feed content with the trimmed URL, falling back to the result of the untrimmed URL
+async fn get_feed(url: ParsedUrl) -> FeedResult<(feed_rs::model::Feed, String)> {
+ // Try parsing the feed using the trimmed URL (we want to keep the saved URLs as clean as possible)
+ match parse_feed_from_url(&url.trimmed, &url.base).await {
+ Ok(feed) => Ok((feed, url.trimmed)),
+ Err(
+ ref _outer @ FeedError::ParseFeedError(
+ ref _inner @ feed_rs::parser::ParseFeedError::ParseError(ref _err),
+ ),
+ ) => {
+ // If we encountered a feed parse error specifically, retry the parsing once more
+ let feed = parse_feed_from_url(&url.url, &url.base).await?;
+ Ok((feed, url.url))
+ }
+ Err(err) => Err(err),
+ }
+}
+
+async fn parse_feed_from_url(url: &str, base_url: &str) -> FeedResult {
+ let data = reqwest::get(url).await?.text().await?;
+ let feed = feed_rs::parser::parse_with_uri(data.as_bytes(), Some(base_url))?;
+
+ Ok(feed)
+}
diff --git a/crates/blend-feed/src/parse/url.rs b/crates/blend-feed/src/parse/url.rs
new file mode 100644
index 00000000..509b7951
--- /dev/null
+++ b/crates/blend-feed/src/parse/url.rs
@@ -0,0 +1,29 @@
+use url::Url;
+
+#[derive(Debug, Clone)]
+pub struct ParsedUrl {
+ pub url: String,
+ pub trimmed: String,
+ pub base: String,
+}
+
+pub fn parse_url(raw_url: &str) -> Option {
+ Url::parse(raw_url).ok().and_then(|url| {
+ url.host_str().map(|domain| domain.to_string()).map(|domain| {
+ // Trim trailing slashes
+ let trimmed = url.to_string().trim_end_matches('/').to_string();
+
+ // Construct the base URL from the scheme and domain
+ let mut base = format!("{}://{}", url.scheme(), domain);
+
+ // We'll need to manually add the port to the base URL
+ if let Some(port) = url.port() {
+ base.push_str(format!(":{}", port).as_ref());
+ }
+
+ let url = url.to_string();
+
+ ParsedUrl { url, trimmed, base }
+ })
+ })
+}
diff --git a/crates/blend-feed/src/readability/dom.rs b/crates/blend-feed/src/readability/dom.rs
new file mode 100644
index 00000000..68b46450
--- /dev/null
+++ b/crates/blend-feed/src/readability/dom.rs
@@ -0,0 +1,116 @@
+#![allow(dead_code)]
+
+use html5ever::tendril::StrTendril;
+use html5ever::Attribute;
+use markup5ever_rcdom::NodeData::{Element, Text};
+use markup5ever_rcdom::{Handle, Node};
+use std::rc::Rc;
+use std::str::FromStr;
+
+pub fn get_tag_name(handle: Handle) -> Option {
+ match handle.data {
+ Element { ref name, .. } => Some(name.local.as_ref().to_lowercase().to_string()),
+ _ => None,
+ }
+}
+
+pub fn get_attr(name: &str, handle: Handle) -> Option {
+ match handle.data {
+ Element {
+ name: _, ref attrs, ..
+ } => attr(name, &attrs.borrow()),
+ _ => None,
+ }
+}
+
+pub fn attr(attr_name: &str, attrs: &[Attribute]) -> Option {
+ for attr in attrs.iter() {
+ if attr.name.local.as_ref() == attr_name {
+ return Some(attr.value.to_string());
+ }
+ }
+ None
+}
+
+pub fn set_attr(attr_name: &str, value: &str, handle: Handle) {
+ if let Element {
+ name: _, ref attrs, ..
+ } = handle.data
+ {
+ let attrs = &mut attrs.borrow_mut();
+ if let Some(index) = attrs.iter().position(|attr| {
+ let name = attr.name.local.as_ref();
+ name == attr_name
+ }) {
+ if let Ok(value) = StrTendril::from_str(value) {
+ attrs[index] = Attribute {
+ name: attrs[index].name.clone(),
+ value,
+ }
+ }
+ }
+ }
+}
+
+pub fn extract_text(handle: Handle, text: &mut String, deep: bool) {
+ for child in handle.children.borrow().iter() {
+ let c = child.clone();
+ match c.data {
+ Text { ref contents } => {
+ text.push_str(contents.borrow().as_ref());
+ }
+ Element { .. } => {
+ if deep {
+ extract_text(child.clone(), text, deep);
+ }
+ }
+ _ => (),
+ }
+ }
+}
+
+pub fn text_len(handle: Handle) -> usize {
+ let mut len = 0;
+ for child in handle.children.borrow().iter() {
+ let c = child.clone();
+ match c.data {
+ Text { ref contents } => {
+ len += contents.borrow().trim().chars().count();
+ }
+ Element { .. } => {
+ len += text_len(child.clone());
+ }
+ _ => (),
+ }
+ }
+ len
+}
+
+pub fn find_node(handle: Handle, tag_name: &str, nodes: &mut Vec>) {
+ for child in handle.children.borrow().iter() {
+ let c = child.clone();
+ if let Element { ref name, .. } = c.data {
+ let t = name.local.as_ref();
+ if t.to_lowercase() == tag_name {
+ nodes.push(child.clone());
+ };
+ find_node(child.clone(), tag_name, nodes)
+ }
+ }
+}
+
+pub fn has_nodes(handle: Handle, tag_names: &Vec<&'static str>) -> bool {
+ for child in handle.children.borrow().iter() {
+ let tag_name: &str = &get_tag_name(child.clone()).unwrap_or_default();
+ if tag_names.iter().any(|&n| n == tag_name) {
+ return true;
+ }
+ if match child.clone().data {
+ Element { .. } => has_nodes(child.clone(), tag_names),
+ _ => false,
+ } {
+ return true;
+ }
+ }
+ false
+}
diff --git a/crates/blend-feed/src/readability/extract.rs b/crates/blend-feed/src/readability/extract.rs
new file mode 100644
index 00000000..a79c355d
--- /dev/null
+++ b/crates/blend-feed/src/readability/extract.rs
@@ -0,0 +1,55 @@
+use super::score::{self, Candidate};
+use crate::error::FeedResult;
+use html5ever::{parse_document, tendril::TendrilSink};
+use markup5ever_rcdom::{RcDom, SerializableHandle};
+use std::{cell::Cell, collections::BTreeMap, io::Cursor, path::Path};
+
+#[derive(Debug)]
+pub struct Result {
+ pub title: String,
+ pub content: String,
+}
+
+pub fn extract(input: &str) -> FeedResult